This notebook estimates the indicators based on the raw data and perfomrs the main analyses and figures used in the manuscript of the multicountry paper. The input is the “clean kobo output” that was first cleaned by 1.2_cleaning.

Packages and functions

Load required libraries:

library(tidyr)
library(dplyr)
library(readr)
library(utile.tools)
library(stringr)
library(ggplot2)
library(ggsankey)
library(alluvial)
library(viridis)
library(cowplot)
library(lme4)
library(knitr)

Load required functions. These custom fuctions are available at: https://github.com/AliciaMstt/GeneticIndicators

source("get_indicator1_data.R")
source("get_indicator2_data.R")
source("get_indicator3_data.R")
source("get_metadata.R")
source("transform_to_Ne.R")
source("estimate_indicator1.R")

Other custom functions:

### not in
'%!in%' <- function(x,y)!('%in%'(x,y))


#' Duplicates data to create additional facet. Thanks to https://stackoverflow.com/questions/18933575/easily-add-an-all-facet-to-facet-wrap-in-ggplot2
#' @param df a dataframe
#' @param col the name of facet column
#'  
CreateAllFacet <- function(df, col){
  df$facet <- df[[col]]
  temp <- df
  temp$facet <- "all"
  merged <-rbind(temp, df)

  # ensure the facet value is a factor
  merged[[col]] <- as.factor(merged[[col]])

  return(merged)
}

Custom colors:

## IUCN official colors
# Assuming order of levels is: "re", "cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown" (for regional, and w/o "re" for global). Make sure to change the levels to that order before plotting.
IUCNcolors<-c("brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")
IUCNcolors_regional<-c("darkorchid2", "brown2", "darkorange", "yellow", "green", "darkgreen", "darkgrey", "azure2", "bisque1")

## nice soft ramp for taxonomic groups
taxoncolors<-cividis(12) # same than using cividis(length(levels(as.factor(metadata$taxonomic_group))))

## Colors for simplified methods to define populations 
# assuming the levels (see how this was created in the section "Simplify combinations of methods to define populations"): of running levels(as.factor(ind2_data$defined_populations_simplified)) (after new order)

# get a set of colors to highlight genetic and geographic with similar colors

simplifiedmethods_colors<-c("#FFA07A", #"dispersal_buffer"
                            "#7f611b", # "eco_biogeo_proxies"
                            "#668cd1", # "genetic_clusters"     
                            "#668cd1", # "genetic_clusters eco_biogeo_proxies"     
                            "#45c097", # "genetic_clusters geographic_boundaries"  
                            "#d4b43e", # "geographic_boundaries"
                            "#d4b43e", # "geographic_boundaries eco_biogeo_proxies"
                            "#d4b43e", # "geographic_boundaries management_units" 
                            "#b34656", # "management_units" 
                            "#be72c9", # "other" 
                            "#be72c9")# "other_combinations" 

Get data

Get indicators and metadata data from clean kobo output

# Get data:
kobo_clean<-read.csv(file="kobo_output_clean.csv", header=TRUE)

# Extract indicator 1 data from kobo output, show most relevant columns
ind1_data<-get_indicator1_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind1_data[,c(1:3, 12:14)])
# Extract Proportion of maintained populations (indicator) data from kobo output, show most relevant columns
ind2_data<-get_indicator2_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind2_data[,c(1:3, 9:10,13)])
# Extract indicator 3 data from kobo output, show most relevant columns
ind3_data<-get_indicator3_data(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(ind3_data[,c(1:3, 9:11)])
# extract metadata, show most relevant columns
metadata<-get_metadata(kobo_output=kobo_clean)
## [1] "the data already contained a taxon column, that was used instead of creating a new one"
head(metadata[,c(1:3, 12, 25,26, 64)])

Get population data for those species assessed using the tabular text template instead of Kobo. This file was produced by the script 1.2_cleaning.Rmd

ind1_data_from_templates<-read.csv(file="ind1_data_from_templates.csv")

Add data recorded using the population template to the ind1_data already in the nice format.

ind1_data<-rbind(ind1_data, ind1_data_from_templates)

Estimate indicators

Indicator 1 (proportion of populations with Ne >500):

Show most relevant columns of indicator 1 data

head(ind1_data[,c(1:3, 12:14)])

Remember what the function to transform NcRange and NcPoint data into Ne does:

# check what the custom funciton does
transform_to_Ne
## function (ind1_data, ratio = 0.1) 
## {
##     ratio = ratio
##     if (!is.numeric(ratio) || ratio < 0 || ratio > 1) {
##         stop("Invalid argument. Please provide a number within the range 0 to 1, using `.` to delimit decimals.")
##     }
##     else {
##         ind1_data = ind1_data
##         ind1_data <- ind1_data %>% mutate(Nc_from_range = case_when(NcRange == 
##             "more_5000_bymuch" ~ 10000, NcRange == "more_5000" ~ 
##             5050, NcRange == "less_5000_bymuch" ~ 500, NcRange == 
##             "less_5000" ~ 4050, NcRange == "range_includes_5000" ~ 
##             5001)) %>% mutate(Ne_from_Nc = case_when(!is.na(NcPoint) ~ 
##             NcPoint * ratio, !is.na(Nc_from_range) ~ Nc_from_range * 
##             ratio)) %>% mutate(Ne_combined = if_else(is.na(Ne), 
##             Ne_from_Nc, Ne))
##         print(ind1_data)
##     }
## }

Use function to get Ne data from NcRange or NcPoint data, and their combination (Ne estimated from Ne if Ne is available, otherwise, from Nc)

ind1_data<-transform_to_Ne(ind1_data = ind1_data, ratio = 0.1)
## # A tibble: 5,049 × 38
##    country_assessme… taxonomic_group taxon scientific_auth… genus year_assesment
##    <chr>             <chr>           <chr> <chr>            <chr> <chr>         
##  1 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  2 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  3 sweden            mammal          Alce… (Linnaeus, 1758) Alces 2023          
##  4 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  5 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  6 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  7 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  8 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
##  9 sweden            fish            Silu… (Linnaeus, 1758) Silu… 2023          
## 10 sweden            bird            Dend… Bechstein 1803   Dend… 2022          
## # … with 5,039 more rows, and 32 more variables: name_assessor <chr>,
## #   email_assessor <chr>, kobo_tabular <chr>, time_populations <chr>,
## #   X_validation_status <chr>, X_uuid <chr>, multiassessment <chr>,
## #   population <chr>, Name <chr>, Origin <chr>, IntroductionYear <chr>,
## #   Ne <dbl>, NeLower <dbl>, NeUpper <dbl>, NeYear <chr>, GeneticMarkers <chr>,
## #   GeneticMarkersOther <chr>, MethodNe <chr>, SourceNe <chr>, NcType <chr>,
## #   NcYear <chr>, NcMethod <chr>, NcRange <chr>, NcRangeDetails <chr>, …

Remember what the function to estimate indicator 1 does:

# check what the custom function does
estimate_indicator1
## function (ind1_data) 
## {
##     indicator1 <- ind1_data %>% group_by(X_uuid, ) %>% summarise(n_pops = n(), 
##         n_pops_Ne_data = sum(!is.na(Ne_combined)), n_pops_more_500 = sum(Ne_combined > 
##             500, na.rm = TRUE), indicator1 = n_pops_more_500/n_pops_Ne_data) %>% 
##         left_join(metadata)
##     print(indicator1)
## }

Now estimate indicator 1 :)

indicator1<-estimate_indicator1(ind1_data = ind1_data)
## Joining, by = "X_uuid"
## # A tibble: 600 × 69
##    X_uuid      n_pops n_pops_Ne_data n_pops_more_500 indicator1 country_assessm…
##    <chr>        <int>          <int>           <int>      <dbl> <chr>           
##  1 010d85cd-5…      2              1               1          1 united_states   
##  2 016d59ae-9…      1              1               0          0 mexico          
##  3 017ff4b6-5…      1              0               0        NaN colombia        
##  4 019bd95f-b…      1              1               0          0 sweden          
##  5 01b10b29-9…      1              1               1          1 south_africa    
##  6 0301e6b3-b…      3              3               3          1 france          
##  7 036baa83-5…      1              0               0        NaN colombia        
##  8 037a15b2-f…      3              2               0          0 colombia        
##  9 037d6c8f-7…      4              2               2          1 united_states   
## 10 03f03179-1…      1              1               1          1 south_africa    
## # … with 590 more rows, and 63 more variables: taxonomic_group <chr>,
## #   taxon <chr>, scientific_authority <chr>, genus <chr>, year_assesment <chr>,
## #   name_assessor <chr>, email_assessor <chr>, common_name <chr>,
## #   kobo_tabular <chr>, X_validation_status <chr>, GBIF_taxonID <int>,
## #   NCBI_taxonID <chr>, national_taxonID <chr>, source_national_taxonID <chr>,
## #   other_populations <chr>, time_populations <chr>, defined_populations <chr>,
## #   source_definition_populations <chr>, map_populations <chr>, …

Proportion of maintained populations (indicator 2) = proportion of populations within species which are maintained.

Proportion of maintained populations (indicator) is the he proportion of populations within species which are maintained. This can be estimated based on the n_extant_populations and n_extint_populations, as follows:

ind2_data$indicator2<- ind2_data$n_extant_populations / (ind2_data$n_extant_populations + ind2_data$n_extint_populations)
head(ind2_data$indicator2)
## [1] 1.0000000 0.5000000 0.2941176 1.0000000 0.3333333 1.0000000

Number of taxa with genetic monitoring squemes (indicator3)

Indicator 3 refers to the number (count) of taxa by country in which genetic monitoring is occurring. This is stored in the variable temp_gen_monitoring as a “yes/no” answer for each taxon, so to estimate the indicator, we only need to count how many said “yes”, keeping only one of the records when the taxon was multiassessed:

indicator3<-ind3_data %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring) %>%
                 filter(!duplicated(.)) %>%

                 # count "yes" in tem_gen_monitoring by country
                 filter(temp_gen_monitoring=="yes") %>%
                 group_by(country_assessment) %>%
                 summarise(n_taxon_gen_monitoring= n())

Join indicators and metadata in a single table

It could be useful to have the estimated indicator and the metadata in a single large table.

indicators_full<-left_join(metadata, indicator1) %>% 
                     left_join(ind2_data) %>% 
                     left_join(ind3_data)
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "common_name", "kobo_tabular", "X_validation_status",
## "X_uuid", "GBIF_taxonID", "NCBI_taxonID", "national_taxonID",
## "source_national_taxonID", "other_populations", "time_populations",
## "defined_populations", "source_definition_populations", "map_populations",
## "map_populations_URL", "habitat_decline_area", "source_populations",
## "popsize_data", "ne_pops_exists", "nc_pops_exists", "ratio_exists",
## "species_related", "ratio_species_related", "ratio_year",
## "source_popsize_ratios", "species_comments", "realm", "IUCN_habitat",
## "other_habitat", "national_endemic", "transboundary_type", "other_explain",
## "country_proportion", "species_range", "rarity", "occurrence_extent",
## "occurrence_area", "pop_fragmentation_level", "species_range_comments",
## "global_IUCN", "regional_redlist", "other_assessment_status",
## "other_assessment_name", "source_status_distribution", "fecundity",
## "semelparous_offpring", "reproductive_strategy", "reproductive_strategy_other",
## "adult_age_data", "other_reproductive_strategy", "longevity_max",
## "longevity_median", "longevity_maturity", "longevity_age",
## "life_history_based_on", "life_history_sp_basedon", "sources_life_history",
## "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "other_populations",
## "time_populations", "defined_populations", "source_definition_populations",
## "map_populations", "map_populations_URL", "habitat_decline_area",
## "source_populations", "multiassessment")
## Joining, by = c("country_assessment", "taxonomic_group", "taxon",
## "scientific_authority", "genus", "year_assesment", "name_assessor",
## "email_assessor", "X_validation_status", "X_uuid", "multiassessment")

Save indicators data

Save indicators data and metadata to csv files, useful for analyses outside R.

# save processed data
write.csv(ind1_data, "ind1_data.csv", row.names = FALSE)
write.csv(indicators_full, "indicators_full.csv", row.names = FALSE)
write.csv(ind2_data, "ind2_data.csv", row.names = FALSE)
write.csv(ind3_data, "ind3_data.csv", row.names = FALSE)
write.csv(metadata, "metadata.csv", row.names = FALSE)

Change country name to nicer labels

To have nice levels in the plots we will change the way country names are written:

# make factor
metadata$country_assessment<-as.factor(metadata$country_assessment)
indicators_full$country_assessment<-as.factor(indicators_full$country_assessment)
ind2_data$country_assessment<-as.factor(ind2_data$country_assessment)
ind1_data$country_assessment<-as.factor(ind1_data$country_assessment)
indicator1$country_assessment<-as.factor(indicator1$country_assessment)

# original levels
levels(metadata$country_assessment)
## [1] "australia"     "belgium"       "colombia"      "france"       
## [5] "japan"         "mexico"        "south_africa"  "sweden"       
## [9] "united_states"
# change
levels(metadata$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicators_full$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind1_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(ind2_data$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")
levels(indicator1$country_assessment)<-c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US")

Simplify combinations of methods to define populations

The methods used to define populations come from a check box question were one or more of the following categories can be selected: genetic_clusters, geographic_boundaries, eco_biogeo_proxies, adaptive_traits, management_units, other. As a consequence any combination of the former can be possible. Leading to the following frequency table:

table(indicators_full$defined_populations)
## 
##                                                                            adaptive_traits 
##                                                                                          5 
##                                                           adaptive_traits management_units 
##                                                                                          1 
##                                                                           dispersal_buffer 
##                                                                                        159 
##                                                           dispersal_buffer adaptive_traits 
##                                                                                          2 
##                                                        dispersal_buffer eco_biogeo_proxies 
##                                                                                          1 
##                                                                     dispersal_buffer other 
##                                                                                          1 
##                                                                         eco_biogeo_proxies 
##                                                                                         44 
##                                                         eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                                                        eco_biogeo_proxies dispersal_buffer 
##                                                                                          7 
##                                                        eco_biogeo_proxies management_units 
##                                                                                          3 
##                                                                   eco_biogeo_proxies other 
##                                                                                          2 
##                                                                           genetic_clusters 
##                                                                                        108 
##                                                           genetic_clusters adaptive_traits 
##                                                                                          7 
##                                                          genetic_clusters dispersal_buffer 
##                                                                                         11 
##                                                        genetic_clusters eco_biogeo_proxies 
##                                                                                         26 
##                                        genetic_clusters eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                       genetic_clusters eco_biogeo_proxies adaptive_traits management_units 
##                                                                                          2 
##                                       genetic_clusters eco_biogeo_proxies management_units 
##                                                                                          1 
##                                                     genetic_clusters geographic_boundaries 
##                                                                                         69 
##                                     genetic_clusters geographic_boundaries adaptive_traits 
##                                                                                          5 
##                                  genetic_clusters geographic_boundaries eco_biogeo_proxies 
##                                                                                          8 
##                  genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits 
##                                                                                          1 
## genetic_clusters geographic_boundaries eco_biogeo_proxies adaptive_traits management_units 
##                                                                                          1 
##                 genetic_clusters geographic_boundaries eco_biogeo_proxies management_units 
##                                                                                          1 
##                                    genetic_clusters geographic_boundaries management_units 
##                                                                                          8 
##                                                          genetic_clusters management_units 
##                                                                                          5 
##                                                                     genetic_clusters other 
##                                                                                          2 
##                                                                      geographic_boundaries 
##                                                                                        274 
##                                                      geographic_boundaries adaptive_traits 
##                                                                                         12 
##                               geographic_boundaries adaptive_traits management_units other 
##                                                                                          1 
##                                                     geographic_boundaries dispersal_buffer 
##                                                                                          1 
##                                                   geographic_boundaries eco_biogeo_proxies 
##                                                                                        105 
##                                   geographic_boundaries eco_biogeo_proxies adaptive_traits 
##                                                                                          3 
##                                  geographic_boundaries eco_biogeo_proxies management_units 
##                                                                                          3 
##                                             geographic_boundaries eco_biogeo_proxies other 
##                                                                                          2 
##                                                     geographic_boundaries management_units 
##                                                                                         24 
##                                                                geographic_boundaries other 
##                                                                                         12 
##                                                                           management_units 
##                                                                                         29 
##                                                                     management_units other 
##                                                                                          1 
##                                                                                      other 
##                                                                                         19

It is hard to group the above methods, so we will keep the original groups with n >=19 in the above list, and tag the combinations that appear few times as as “other_combinations”.

Which groups have n>=19?

x<-as.data.frame(table(indicators_full$defined_populations)[table(indicators_full$defined_populations) >= 19])
colnames(x)[1]<-"method"

x

We can add this new column to the metadata and Proportion of maintained populations (indicator) data:

### for ind data
indicators_full<- indicators_full %>% 
  mutate(defined_populations_simplified = case_when(
         # if the method is in the list of methods n>=19 then keep it
         defined_populations %in% x$method ~ defined_populations,
         TRUE ~ "other_combinations"))


### for meta
metadata<- metadata %>% 
  mutate(defined_populations_simplified = case_when(
         # if the method is in the list of methods n>=19 then keep it
         defined_populations %in% x$method ~ defined_populations,
         TRUE ~ "other_combinations"))

Check n for simplified methods:

table(indicators_full$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      159 
##                       eco_biogeo_proxies 
##                                       44 
##                         genetic_clusters 
##                                      108 
##      genetic_clusters eco_biogeo_proxies 
##                                       26 
##   genetic_clusters geographic_boundaries 
##                                       69 
##                    geographic_boundaries 
##                                      274 
## geographic_boundaries eco_biogeo_proxies 
##                                      105 
##   geographic_boundaries management_units 
##                                       24 
##                         management_units 
##                                       29 
##                                    other 
##                                       19 
##                       other_combinations 
##                                      115

Table of equivalences:

indicators_full %>% 
       select(defined_populations, defined_populations_simplified) %>% 
       filter(!duplicated(defined_populations))

Create nicer names for ploting

# original method names
levels(as.factor(indicators_full$defined_populations_simplified))
##  [1] "dispersal_buffer"                        
##  [2] "eco_biogeo_proxies"                      
##  [3] "genetic_clusters"                        
##  [4] "genetic_clusters eco_biogeo_proxies"     
##  [5] "genetic_clusters geographic_boundaries"  
##  [6] "geographic_boundaries"                   
##  [7] "geographic_boundaries eco_biogeo_proxies"
##  [8] "geographic_boundaries management_units"  
##  [9] "management_units"                        
## [10] "other"                                   
## [11] "other_combinations"
# nicer names
nice_names <- c("dispersal buffer",
                "eco- biogeographic proxies",
                 "genetic clusters",
                 "genetic clusters & eco- biogeographic proxies",
                 "genetic clusters & geographic boundaries",
                 "geographic boundaries",
                 "geographic boundaries & eco- biogeographic proxies",
                 "geographic boundaries & management units",
                 "management units",
                 "other", 
                 "other combinations")


### add them
indicators_full$defined_populations_nicenames <- factor(
    indicators_full$defined_populations_simplified,
    levels = levels(as.factor(indicators_full$defined_populations_simplified)),
    labels = nice_names)

# metadata
metadata$defined_populations_nicenames <- factor(
    metadata$defined_populations_simplified,
    levels = levels(as.factor(metadata$defined_populations_simplified)),
    labels = nice_names)

#check names match
select(metadata, defined_populations_nicenames, defined_populations_simplified)
levels(indicators_full$defined_populations_nicenames)
##  [1] "dispersal buffer"                                  
##  [2] "eco- biogeographic proxies"                        
##  [3] "genetic clusters"                                  
##  [4] "genetic clusters & eco- biogeographic proxies"     
##  [5] "genetic clusters & geographic boundaries"          
##  [6] "geographic boundaries"                             
##  [7] "geographic boundaries & eco- biogeographic proxies"
##  [8] "geographic boundaries & management units"          
##  [9] "management units"                                  
## [10] "other"                                             
## [11] "other combinations"

Averaging multiassessments

Some taxa were assessed twice or more times, for example to account for uncertainty on how to divide populations. This information is stored in variable multiassessment of the metadata (created by get_metadata()). An example of taxa with multiple assessments:

metadata %>%
filter(multiassessment=="multiassessment")  %>%
  select(taxonomic_group, taxon, country_assessment, multiassessment) %>%
  arrange(taxon, country_assessment) %>%
  head()

Multiassessments allow to account for uncertainty in the number of populations or the size of them. We can examine how the indicators value species by species as done elsewhere in these analyses (see below “Values for indicator 1 and 2 for multiassessed species), but to examine global trends, some of the figures below use the average.

indicators_averaged<-indicators_full %>%
  # group desired multiassessments
  group_by(country_assessment, multiassessment, taxon) %>%
  # estimate means
  mutate(indicator1_mean=mean(indicator1, na.rm=TRUE)) %>%
  mutate(indicator2_mean=mean(indicator2, na.rm=TRUE)) %>%
  # change NaN for NA (needed due to the NAs and 0s in the dataset)
  mutate_all(~ifelse(is.nan(.), NA, .)) 
## `mutate_all()` ignored the following grouping variables:
## • Columns `country_assessment`, `multiassessment`, `taxon`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.

Examples of how this looks to check it was done properly. For indicator 1:

indicators_averaged %>%
  filter(taxon == "Barbastella barbastellus") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
  filter(taxon == "Rana dalmatina") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)
indicators_averaged %>%
  filter(taxon == "Ambystoma cingulatum") %>%
  select(taxon, country_assessment, multiassessment, indicator1, indicator1_mean)

For Proportion of maintained populations (indicator):

indicators_averaged %>%
  filter(taxon == "Ambystoma cingulatum") %>%
  select(taxon, country_assessment, multiassessment, indicator2, indicator2_mean)

Because we will use the averages to show a single value for multiasssessed taxa, we can keep only the first record for multiassessed taxa.

indicators_averaged_one<-indicators_averaged[!duplicated(cbind(indicators_averaged$taxon, indicators_averaged$country_assessment)), ]

General description of records and taxa assessed by country

Records by country, including taxa assessed more than once (see below for details on this)

ggplot(metadata, aes(x=country_assessment)) + 
  geom_bar(stat = "count") +
  xlab("") +
  ggtitle("Number of taxa assessed by country, including taxa assed more than once") +
  theme_light()

To explore what kind of taxa countries assessed regardless of if they assessed them once or more, we are going to use the subset indicators_averaged_one, were we averaged the indicators and kept only 1 record per assessment.

How many taxa were assessed (i.e. counting only once taxa that were assessed multiple times)?

# how many?
nrow(indicators_averaged_one)
## [1] 909

Plot taxa assessed excluding duplicates, i.e. the real number of taxa assessed:

ggplot(indicators_averaged_one, aes(x=country_assessment)) + 
  geom_bar(stat = "count") +
  xlab("") +
  ggtitle("Number of taxa assessed by country") +
  theme_light()

Of which countries and taxonomic groups are the taxa that were assessed more than once?

indicators_averaged_one %>% # we use the _unique dataset so that multiassesed records are counted only once
        filter(multiassessment=="multiassessment") %>%

ggplot(aes(x=taxonomic_group, fill=country_assessment)) + 
  geom_bar(stat = "count") +
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill="Country") +
  xlab("") +
  ggtitle("Number of taxa assessed more than once") +
  theme_light()

Population size data (Has Nc or Ne? what type of Nc?)

Population size data availability by country

Countries have population size data (Nc or Ne) regardless of the taxonomic group.

ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) + 
  geom_bar(stat = "count") +
  coord_flip() +
  facet_wrap(~country_assessment, ncol = 5) +
  scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
                    breaks=c("yes", "data_for_species", "insuff_data_species"),
                    labels=c("Population level", "Species or subspecies level", "Insufficient data")) +
  labs(fill="Population size data availability",
       x="",
       y="Number of taxa (including records of taxa assessed more than once)") +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="top")

Same plot but including a panel for the entire dataset:

## Duplicate data with an additional column "facet"

df<-CreateAllFacet(metadata, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

# Plot
ggplot(df, aes(x=taxonomic_group, fill=popsize_data)) + 
  geom_bar(stat = "count") +
  coord_flip() +
  facet_wrap(~facet, ncol = 5, scales="free_x") +
  scale_fill_manual(values=c("#2ca02c", "#1f77b4", "grey80"),
                    breaks=c("yes", "data_for_species", "insuff_data_species"),
                    labels=c("Population level", "Species or subspecies level", "Insufficient data")) +  labs(fill="Population size data availability",
       x="",
       y="Number of taxa (including records of taxa assessed more than once)") +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="top")

Population size data availability in the entire dataset:

ggplot(metadata, aes(x=taxonomic_group, fill=popsize_data)) + 
  geom_bar(stat = "count") +
  coord_flip() +
  scale_fill_manual(values=c("#1f77b4", "grey80", "#2ca02c"),
                    breaks=c(levels(as.factor(metadata$popsize_data))),
                                        labels=c("Species level or subspecies level", "Insufficient data", "Population level")) +
  labs(fill="Population size data availability",
       x="",
       y="Number of taxa (including records of taxa assessed more than once)") +
  theme_light() +
  theme(legend.position="right")

Ne data yes or not? & Type of Nc data

Ne available by taxa?

p1<- metadata %>% 
  filter(!is.na(ne_pops_exists)) %>% 
  filter(ne_pops_exists!="other_genetic_info") %>%
    ggplot(aes(x=country_assessment, fill=ne_pops_exists)) + 
  geom_bar() +
scale_fill_manual(labels=c("no", "yes"),
                      breaks=c("no_genetic_data", "ne_available"),
                      values=c("#ff7f0e", "#2ca02c")) +
xlab("") +
ylab("Number of taxa") +
labs(fill="Ne available \n(from genetic data)")  +
theme_light() +
theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

p1

Nc data available by taxa?

p2<-metadata %>%
  filter(!is.na(nc_pops_exists)) %>%
    ggplot(aes(x=country_assessment, fill=nc_pops_exists)) +
    geom_bar() +
    scale_fill_manual(values=c("#ff7f0e", "#2ca02c")) + 
    labs(fill="Nc available") +
    xlab("") +
    ylab("Number of taxa") +
    theme_light() +
    theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p2

What kind of Nc data? (dodge bars)

ind1_data %>%
  filter(!is.na(NcType)) %>%
  ggplot(aes(x=country_assessment, fill=NcType))+
  geom_bar(position = "dodge") +
  scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
                      breaks=c("Nc_point", "Nc_range", "unknown"),
                      values=c("#0072B2", "#E69F00", "grey80")) +
  xlab("") +
  ylab("Number of populations") +
  labs(fill="Type of Nc data \nby population") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())

What kind of Nc data? (fill bars)

p3<-ind1_data %>%
  filter(!is.na(NcType)) %>%
  ggplot(aes(x=country_assessment, fill=NcType))+
  geom_bar(position = "fill") +
  scale_fill_manual(labels=c("Point", "Range \nor qualitative", "Unknown"),
                      breaks=c("Nc_point", "Nc_range", "unknown"),
                      values=c("#0072B2", "#E69F00", "grey80")) +
  xlab("") +
  ylab("Proportion of populations") +
  labs(fill="Type of Nc data \nby population") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p3

Missing data on extant and extinct populations

We have NA in Proportion of maintained populations (indicator) because in some cases the number of extinct populations is unknown, therefore the operation cannot be computed.

Counts

Total records with NA in extant populations:

sum(is.na(indicators_full$n_extant_populations))
## [1] 19

Taxa with NA in extant populations:

indicators_full %>%
  filter(is.na(n_extant_populations)) %>%
    select(country_assessment, taxonomic_group, taxon, n_extant_populations, n_extint_populations)

Total taxa with NA in extinct populations:

sum(is.na(indicators_full$n_extint_populations))
## [1] 379

Do taxa with NA for extant also have NA for extinct?

indicators_full$taxon[is.na(indicators_full$n_extant_populations)] %in% indicators_full$taxon[is.na(indicators_full$n_extint_populations)]
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE

So out of the 972, we have 379 records with NA in n_extinct and 19 records with NA in n_extant. Of them, 19 have NA in both n_extant and n_extinct.

Plot missing data extinct populations

p4<-indicators_full %>%
  ggplot(aes(x=country_assessment, fill=is.na(n_extint_populations))) +
  geom_bar() +
  scale_fill_manual(labels=c("number of populations known", "missing data"),
                    values=c("#2ca02c", "#ff7f0e")) + 
  labs(fill="Extinct populations") +
  xlab("") + ylab("Number of taxa") +
  theme_light() +
  theme(text = element_text(size = 13), legend.position = "right", panel.border = element_blank())
p4

Plot missing data extinct populations by country

Missing data in number of extinct populations by method to define populations:

indicators_full %>%
  ggplot(aes(x=defined_populations_nicenames, fill=is.na(n_extint_populations)))+
  geom_bar() +
  coord_flip()+ 
  scale_fill_manual(labels=c("number of populations known", "missing data"),
                    values=c("#2ca02c", "#ff7f0e")) + 
  labs(fill="Extinct populations") +
  xlab("") + ylab("Number of taxa") +
  facet_wrap(country_assessment ~., nrow = 3, scales="free_x") +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="top")

Distribution of Nc, Ne, types of Nc and missing extinct pops in a single figure with 4 panels:

Distribution of Nc, Ne and types of Ne in a single figure with 3 panels, using count for a & b, and proportions for c:

# plot
plot_grid(p1 + theme(legend.justification = c(0,.5)), # legend.justification aligns legends
          p2 + theme(legend.justification = c(0,.5)),
          p3 + theme(legend.justification = c(0,.5)),
          p4 + theme(legend.justification = c(0,.5)),
          ncol=1, rel_widths = c(1,1,1,1), align = "v", labels=c("a)", "b)", "c)", "d)"), vjust = .7)  

Method to define populations used by country (alluvial)

Reformat data

select(metadata, defined_populations_nicenames, defined_populations_simplified)
# reformat data
foralluvial<-metadata %>% group_by(country_assessment, defined_populations_nicenames, taxonomic_group) %>%
             summarise(n=n()) 
## `summarise()` has grouped output by 'country_assessment',
## 'defined_populations_nicenames'. You can override using the `.groups` argument.
# define colors
my_cols<- simplifiedmethods_colors

# we need a vector of colors by country for each row of the dataset, so:
methodspop<-as.factor(foralluvial$defined_populations_nicenames)
levels(methodspop)<-my_cols
methodspop<-as.vector(methodspop)
head(methodspop)
## [1] "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#668cd1" "#45c097"

Plot

# plot
alluvial(foralluvial[,1:3], freq = foralluvial$n,
         col=methodspop, 
         blocks=FALSE,
         gap.width = 0.5,
         cex=.8, 
         xw = 0.1,
         cw = 0.2,
         border = NA,
         alpha = .7)

Effect of method used to define populations on number of populations and PM and Ne>500 indicators

The analyses and plots below us a subset of data filtering outliers (>500 populations) and using the simplified methods (see above). Multiassessed species are considered independently (each assessment is a data point).

Plot Number of maintained populations by country and method

indicators_full %>% 
  filter(n_extant_populations<500) %>% # filter outliers
  # order countries vertically by similar number of pops
  mutate(country_assessment = factor(country_assessment, 
                                     levels=c("Colombia", "Australia", "Belgium",
                                               "Mexico", "France", "US", 
                                               "S. Africa", "Japan", "Sweden"))) %>%
  ggplot(aes(x=defined_populations_nicenames, y=n_extant_populations, 
             fill=defined_populations_nicenames, color=defined_populations_nicenames)) +
          geom_boxplot() +
          geom_jitter(size=.3, width = 0.1, color="black") +
  coord_flip() +
  facet_wrap(country_assessment ~ ., nrow=3, scales="free_x") +
  xlab("")  +
  ylab("Number of maintained populations") +
  scale_fill_manual(values=alpha(simplifiedmethods_colors, .3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        text = element_text(size = 15)) 

levels(indicators_full$country_assessment)
## [1] "Australia" "Belgium"   "Colombia"  "France"    "Japan"     "Mexico"   
## [7] "S. Africa" "Sweden"    "US"

(a) Does the number of maintained pops vary with method used?

Plot number of populations by method

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(n_extant_populations)) %>% 
                    filter(n_extant_populations<500) %>%
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(!is.na(n_extant_populations)) %>% 
  filter(n_extant_populations<500) %>%
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
# plot for number of pops
  p1<- df %>%
  ggplot(aes(x=myaxis, y=n_extant_populations, color=defined_populations_nicenames,
                                               fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Number of maintained populations") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +  
  theme(text = element_text(size = 13))
p1

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(n_extant_populations)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      154 
##                       eco_biogeo_proxies 
##                                       44 
##                         genetic_clusters 
##                                      106 
##      genetic_clusters eco_biogeo_proxies 
##                                       25 
##   genetic_clusters geographic_boundaries 
##                                       69 
##                    geographic_boundaries 
##                                      270 
## geographic_boundaries eco_biogeo_proxies 
##                                      104 
##   geographic_boundaries management_units 
##                                       24 
##                         management_units 
##                                       29 
##                                    other 
##                                       14 
##                       other_combinations 
##                                      112
# total n
nrow(data_for_model)
## [1] 951
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model asking: Does the number of maintained pops vary with method used? Test controlling for variation in the number of maintaiend populations among countries:

m1<-glmer(data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified + 
            (1|data_for_model$country_assessment), family ="poisson")

See results:

summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: 
## data_for_model$n_extant_populations ~ data_for_model$defined_populations_simplified +  
##     (1 | data_for_model$country_assessment)
## 
##      AIC      BIC   logLik deviance df.resid 
##  28559.8  28618.1 -14267.9  28535.8      939 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -6.331 -2.869 -1.224  0.289 75.935 
## 
## Random effects:
##  Groups                            Name        Variance Std.Dev.
##  data_for_model$country_assessment (Intercept) 1.033    1.017   
## Number of obs: 951, groups:  data_for_model$country_assessment, 9
## 
## Fixed effects:
##                                                                                       Estimate
## (Intercept)                                                                            2.36570
## data_for_model$defined_populations_simplifieddispersal_buffer                         -0.96158
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.02632
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -1.25092
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -1.48796
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.19472
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.02986
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   -0.11945
## data_for_model$defined_populations_simplifiedmanagement_units                         -0.50898
## data_for_model$defined_populations_simplifiedother                                    -1.24568
## data_for_model$defined_populations_simplifiedother_combinations                       -0.57163
##                                                                                       Std. Error
## (Intercept)                                                                              0.33938
## data_for_model$defined_populations_simplifieddispersal_buffer                            0.05237
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          0.03225
## data_for_model$defined_populations_simplifiedgenetic_clusters                            0.06161
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.08928
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.03467
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.03955
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      0.05064
## data_for_model$defined_populations_simplifiedmanagement_units                            0.05381
## data_for_model$defined_populations_simplifiedother                                       0.11149
## data_for_model$defined_populations_simplifiedother_combinations                          0.03461
##                                                                                       z value
## (Intercept)                                                                             6.971
## data_for_model$defined_populations_simplifieddispersal_buffer                         -18.362
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.816
## data_for_model$defined_populations_simplifiedgenetic_clusters                         -20.303
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -16.667
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     5.617
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.755
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    -2.359
## data_for_model$defined_populations_simplifiedmanagement_units                          -9.459
## data_for_model$defined_populations_simplifiedother                                    -11.173
## data_for_model$defined_populations_simplifiedother_combinations                       -16.515
##                                                                                       Pr(>|z|)
## (Intercept)                                                                           3.16e-12
## data_for_model$defined_populations_simplifieddispersal_buffer                          < 2e-16
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.4144
## data_for_model$defined_populations_simplifiedgenetic_clusters                          < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       < 2e-16
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   1.94e-08
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.4503
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.0183
## data_for_model$defined_populations_simplifiedmanagement_units                          < 2e-16
## data_for_model$defined_populations_simplifiedother                                     < 2e-16
## data_for_model$defined_populations_simplifiedother_combinations                        < 2e-16
##                                                                                          
## (Intercept)                                                                           ***
## data_for_model$defined_populations_simplifieddispersal_buffer                         ***
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          
## data_for_model$defined_populations_simplifiedgenetic_clusters                         ***
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      ***
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   ***
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   *  
## data_for_model$defined_populations_simplifiedmanagement_units                         ***
## data_for_model$defined_populations_simplifiedother                                    ***
## data_for_model$defined_populations_simplifiedother_combinations                       ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                                    (Intr) dt_fr_mdl$dfnd_ppltns_smplfdd_
## dt_fr_mdl$dfnd_ppltns_smplfdd_     -0.036                               
## dt_fr_$____                        -0.015  0.089                        
## dt_fr_mdl$dfnd_ppltns_smplfdg_     -0.015  0.082                        
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ -0.006  0.034                        
## dt_f_$___g_                        -0.021  0.098                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ -0.023  0.070                        
## dt_f_$___m_                        -0.011  0.059                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_     -0.010  0.057                        
## dt_fr_md$__                        -0.005  0.028                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_     -0.029  0.425                        
##                                    d__$____ dt_fr_mdl$dfnd_ppltns_smplfdg_
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                            
## dt_fr_$____                                                               
## dt_fr_mdl$dfnd_ppltns_smplfdg_      0.092                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__  0.094    0.042                        
## dt_f_$___g_                         0.172    0.132                        
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.223    0.072                        
## dt_f_$___m_                         0.139    0.060                        
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.141    0.057                        
## dt_fr_md$__                         0.069    0.031                        
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.183    0.121                        
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgn_e__ d__$_g
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                         0.068                                   
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__  0.079                              0.138
## dt_f_$___m_                         0.050                              0.112
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.050                              0.106
## dt_fr_md$__                         0.025                              0.054
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.071                              0.179
##                                    dt_fr_mdl$dfnd_ppltns_smplfdgg_e__ d__$_m
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                              
## dt_fr_$____                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                          
## dt_f_$___g_                                                                 
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                          
## dt_f_$___m_                         0.117                                   
## dt_fr_mdl$dfnd_ppltns_smplfdm_      0.118                              0.080
## dt_fr_md$__                         0.057                              0.038
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.151                              0.112
##                                    dt_fr_mdl$dfnd_ppltns_smplfdm_ dt__$__
## dt_fr_mdl$dfnd_ppltns_smplfdd_                                           
## dt_fr_$____                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdg_                                           
## dt_fr_mdl$dfnd_ppltns_smplfdgn_e__                                       
## dt_f_$___g_                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdgg_e__                                       
## dt_f_$___m_                                                              
## dt_fr_mdl$dfnd_ppltns_smplfdm_                                           
## dt_fr_md$__                         0.038                                
## dt_fr_mdl$dfnd_ppltns_smplfdt_      0.110                          0.055

(b) Does the proportion of maintained populations (indicator2) vary with method used?

Plot

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(indicator2)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot for Proportion of maintained populations (indicator)
p2<- df %>%
  filter(n_extant_populations<500) %>%
  ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of maintained populations") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme(text = element_text(size = 13))
p2

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                       78 
##                       eco_biogeo_proxies 
##                                       32 
##                         genetic_clusters 
##                                       51 
##      genetic_clusters eco_biogeo_proxies 
##                                       18 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      176 
## geographic_boundaries eco_biogeo_proxies 
##                                       68 
##   geographic_boundaries management_units 
##                                       17 
##                         management_units 
##                                       23 
##                                    other 
##                                        9 
##                       other_combinations 
##                                       78
# total n
nrow(data_for_model)
## [1] 591
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model asking: Does indicator 2 vary with method used? Controlling for variation in indicator2 among countries:

m2<-glm(data_for_model$indicator2 ~ data_for_model$defined_populations_simplified, family ="quasibinomial")

See results:

summary(m2)
## 
## Call:
## glm(formula = data_for_model$indicator2 ~ data_for_model$defined_populations_simplified, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8257  -0.3189   0.3665   0.6366   0.6902  
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                            1.40198
## data_for_model$defined_populations_simplifieddispersal_buffer                         -0.08876
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.12760
## data_for_model$defined_populations_simplifiedgenetic_clusters                          1.26462
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       1.30795
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.44166
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.09142
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.65459
## data_for_model$defined_populations_simplifiedmanagement_units                         -0.06713
## data_for_model$defined_populations_simplifiedother                                     0.05527
## data_for_model$defined_populations_simplifiedother_combinations                        0.47163
##                                                                                       Std. Error
## (Intercept)                                                                              0.12385
## data_for_model$defined_populations_simplifieddispersal_buffer                            0.21950
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          0.32665
## data_for_model$defined_populations_simplifiedgenetic_clusters                            0.39171
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         0.64937
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      0.32220
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    0.23948
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      0.51546
## data_for_model$defined_populations_simplifiedmanagement_units                            0.35794
## data_for_model$defined_populations_simplifiedother                                       0.57065
## data_for_model$defined_populations_simplifiedother_combinations                          0.25074
##                                                                                       t value
## (Intercept)                                                                            11.320
## data_for_model$defined_populations_simplifieddispersal_buffer                          -0.404
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.391
## data_for_model$defined_populations_simplifiedgenetic_clusters                           3.228
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        2.014
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     1.371
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.382
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     1.270
## data_for_model$defined_populations_simplifiedmanagement_units                          -0.188
## data_for_model$defined_populations_simplifiedother                                      0.097
## data_for_model$defined_populations_simplifiedother_combinations                         1.881
##                                                                                       Pr(>|t|)
## (Intercept)                                                                            < 2e-16
## data_for_model$defined_populations_simplifieddispersal_buffer                          0.68610
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.69621
## data_for_model$defined_populations_simplifiedgenetic_clusters                          0.00131
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.04445
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.17098
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.70280
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.20462
## data_for_model$defined_populations_simplifiedmanagement_units                          0.85129
## data_for_model$defined_populations_simplifiedother                                     0.92288
## data_for_model$defined_populations_simplifiedother_combinations                        0.06049
##                                                                                          
## (Intercept)                                                                           ***
## data_for_model$defined_populations_simplifieddispersal_buffer                            
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          
## data_for_model$defined_populations_simplifiedgenetic_clusters                         ** 
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      *  
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      
## data_for_model$defined_populations_simplifiedmanagement_units                            
## data_for_model$defined_populations_simplifiedother                                       
## data_for_model$defined_populations_simplifiedother_combinations                       .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.4278736)
## 
##     Null deviance: 256.01  on 590  degrees of freedom
## Residual deviance: 245.48  on 580  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

(c) Does the proportion of populations with Ne>500 (indicator1) vary with method used?

Plot

# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
                    filter(!is.na(indicator1)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = paste0(defined_populations_nicenames, " (n= ", num, ")")) %>%
#myaxis needs levels in the same order than defined_populations_nicenames
  mutate(myaxis = factor(myaxis, 
                  levels=levels(as.factor(myaxis))[c(1,12,2:11,13)])) # reorders levels
## Joining, by = "defined_populations_nicenames"
## plot 
p3<- df %>%
  ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of populations with Ne>500") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev) +
  theme(text = element_text(size = 13))
p3

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      138 
##                       eco_biogeo_proxies 
##                                       16 
##                         genetic_clusters 
##                                       58 
##      genetic_clusters eco_biogeo_proxies 
##                                        8 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      156 
## geographic_boundaries eco_biogeo_proxies 
##                                       54 
##   geographic_boundaries management_units 
##                                       20 
##                         management_units 
##                                       13 
##                                    other 
##                                        6 
##                       other_combinations 
##                                       66
# total n
nrow(data_for_model)
## [1] 576
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model asking: Does indicator 1 vary with method used? Controlling for variation in indicator1 among countries:

m3<-glm(data_for_model$indicator1 ~ data_for_model$defined_populations_simplified, family ="quasibinomial")

See results:

summary(m3)
## 
## Call:
## glm(formula = data_for_model$indicator1 ~ data_for_model$defined_populations_simplified, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3232  -0.6993  -0.6875   0.5900   1.8815  
## 
## Coefficients:
##                                                                                        Estimate
## (Intercept)                                                                           -1.283644
## data_for_model$defined_populations_simplifieddispersal_buffer                         -0.038474
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       -0.001555
## data_for_model$defined_populations_simplifiedgenetic_clusters                          0.794217
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       1.186345
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.695198
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.482947
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.704802
## data_for_model$defined_populations_simplifiedmanagement_units                         -0.299649
## data_for_model$defined_populations_simplifiedother                                     1.620116
## data_for_model$defined_populations_simplifiedother_combinations                        0.393066
##                                                                                       Std. Error
## (Intercept)                                                                             0.173723
## data_for_model$defined_populations_simplifieddispersal_buffer                           0.255060
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.569816
## data_for_model$defined_populations_simplifiedgenetic_clusters                           0.297834
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.656511
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.339347
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.315332
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.451562
## data_for_model$defined_populations_simplifiedmanagement_units                           0.682284
## data_for_model$defined_populations_simplifiedother                                      0.760649
## data_for_model$defined_populations_simplifiedother_combinations                         0.298183
##                                                                                       t value
## (Intercept)                                                                            -7.389
## data_for_model$defined_populations_simplifieddispersal_buffer                          -0.151
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        -0.003
## data_for_model$defined_populations_simplifiedgenetic_clusters                           2.667
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        1.807
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     2.049
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   1.532
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     1.561
## data_for_model$defined_populations_simplifiedmanagement_units                          -0.439
## data_for_model$defined_populations_simplifiedother                                      2.130
## data_for_model$defined_populations_simplifiedother_combinations                         1.318
##                                                                                       Pr(>|t|)
## (Intercept)                                                                           5.35e-13
## data_for_model$defined_populations_simplifieddispersal_buffer                          0.88015
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.99782
## data_for_model$defined_populations_simplifiedgenetic_clusters                          0.00788
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.07129
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.04096
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.12619
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.11913
## data_for_model$defined_populations_simplifiedmanagement_units                          0.66070
## data_for_model$defined_populations_simplifiedother                                     0.03361
## data_for_model$defined_populations_simplifiedother_combinations                        0.18797
##                                                                                          
## (Intercept)                                                                           ***
## data_for_model$defined_populations_simplifieddispersal_buffer                            
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          
## data_for_model$defined_populations_simplifiedgenetic_clusters                         ** 
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      .  
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   *  
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      
## data_for_model$defined_populations_simplifiedmanagement_units                            
## data_for_model$defined_populations_simplifiedother                                    *  
## data_for_model$defined_populations_simplifiedother_combinations                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7997607)
## 
##     Null deviance: 534.42  on 575  degrees of freedom
## Residual deviance: 518.25  on 565  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

(A) Is there a relationship between number of maintained populations and Indicator2, overall, and/or with some methods?

Scatter plot of indicator2 vs extant pops

p4<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>%
  filter(!is.na(n_extant_populations)) %>%
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator2, color=defined_populations_nicenames)) +
    geom_point() +
    theme_light() +
    scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
    theme(legend.position = "none") +
    ylab("Proportion of maintained populations") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
p4

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>%
                      filter(!is.na(n_extant_populations)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                       78 
##                       eco_biogeo_proxies 
##                                       32 
##                         genetic_clusters 
##                                       51 
##      genetic_clusters eco_biogeo_proxies 
##                                       18 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      176 
## geographic_boundaries eco_biogeo_proxies 
##                                       68 
##   geographic_boundaries management_units 
##                                       17 
##                         management_units 
##                                       23 
##                                    other 
##                                        9 
##                       other_combinations 
##                                       78
# total n
nrow(data_for_model)
## [1] 591
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run GLM model including method. Note: the number of mantained populations varies a lot among countries, but it’s in the predictor here, not response, so don’t really need country as random factor.

This model will be unbalanced and it is more parametarized, but it would be the ideal if our dataset were larger:

# run model
m4 <- glm(data_for_model$indicator2 ~ data_for_model$n_extant_populations + 
            data_for_model$defined_populations_simplified + 
            data_for_model$n_extant_populations*data_for_model$defined_populations_simplified, family = "quasibinomial")

Summary:

summary(m4)
## 
## Call:
## glm(formula = data_for_model$indicator2 ~ data_for_model$n_extant_populations + 
##     data_for_model$defined_populations_simplified + data_for_model$n_extant_populations * 
##     data_for_model$defined_populations_simplified, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0141  -0.3160   0.3548   0.6151   0.9267  
## 
## Coefficients:
##                                                                                                                            Estimate
## (Intercept)                                                                                                                1.380249
## data_for_model$n_extant_populations                                                                                        0.001082
## data_for_model$defined_populations_simplifieddispersal_buffer                                                             -0.469445
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                            0.058023
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                              1.478090
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                           3.155611
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                        0.651887
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                      0.193791
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                        0.370269
## data_for_model$defined_populations_simplifiedmanagement_units                                                              0.507064
## data_for_model$defined_populations_simplifiedother                                                                        -1.467012
## data_for_model$defined_populations_simplifiedother_combinations                                                            0.482000
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                          0.067182
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.002354
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                         -0.063257
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -0.152885
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   -0.007302
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.006244
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.097350
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                         -0.044946
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                     0.708639
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                        0.000205
##                                                                                                                           Std. Error
## (Intercept)                                                                                                                 0.144909
## data_for_model$n_extant_populations                                                                                         0.003181
## data_for_model$defined_populations_simplifieddispersal_buffer                                                               0.311163
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                             0.380967
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                               0.576897
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                            1.395501
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                         0.376416
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                       0.270433
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                         0.724223
## data_for_model$defined_populations_simplifiedmanagement_units                                                               0.507625
## data_for_model$defined_populations_simplifiedother                                                                          1.031249
## data_for_model$defined_populations_simplifiedother_combinations                                                             0.291590
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                           0.040722
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.007198
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                           0.118534
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.061828
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.004592
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.004962
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.178852
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                           0.023443
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                      0.621806
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                         0.012476
##                                                                                                                           t value
## (Intercept)                                                                                                                 9.525
## data_for_model$n_extant_populations                                                                                         0.340
## data_for_model$defined_populations_simplifieddispersal_buffer                                                              -1.509
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                             0.152
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                               2.562
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                            2.261
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                         1.732
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                       0.717
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                         0.511
## data_for_model$defined_populations_simplifiedmanagement_units                                                               0.999
## data_for_model$defined_populations_simplifiedother                                                                         -1.423
## data_for_model$defined_populations_simplifiedother_combinations                                                             1.653
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                           1.650
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.327
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                          -0.534
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       -2.473
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    -1.590
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -1.258
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.544
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                          -1.917
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                      1.140
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                         0.016
##                                                                                                                           Pr(>|t|)
## (Intercept)                                                                                                                 <2e-16
## data_for_model$n_extant_populations                                                                                         0.7339
## data_for_model$defined_populations_simplifieddispersal_buffer                                                               0.1319
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                             0.8790
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                               0.0107
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                            0.0241
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                         0.0838
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                       0.4739
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                         0.6094
## data_for_model$defined_populations_simplifiedmanagement_units                                                               0.3183
## data_for_model$defined_populations_simplifiedother                                                                          0.1554
## data_for_model$defined_populations_simplifiedother_combinations                                                             0.0989
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                           0.0995
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.7438
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                           0.5938
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.0137
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.1123
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.2087
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.5864
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                           0.0557
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                      0.2549
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                         0.9869
##                                                                                                                              
## (Intercept)                                                                                                               ***
## data_for_model$n_extant_populations                                                                                          
## data_for_model$defined_populations_simplifieddispersal_buffer                                                                
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                              
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                             *  
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                          *  
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                       .  
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                        
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                          
## data_for_model$defined_populations_simplifiedmanagement_units                                                                
## data_for_model$defined_populations_simplifiedother                                                                           
## data_for_model$defined_populations_simplifiedother_combinations                                                           .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                         .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                            
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      *  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies    
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                         .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                       
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.4784305)
## 
##     Null deviance: 256.01  on 590  degrees of freedom
## Residual deviance: 232.16  on 569  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7

We run a similar model than above, but limited to relationship of interest: glm(indicator2 ~ N_maintained_pops, family = “quasibinomial”).

m4.1 <- glm(data_for_model$indicator2 ~ data_for_model$n_extant_populations, family = "quasibinomial")
m4.1
## 
## Call:  glm(formula = data_for_model$indicator2 ~ data_for_model$n_extant_populations, 
##     family = "quasibinomial")
## 
## Coefficients:
##                         (Intercept)  data_for_model$n_extant_populations  
##                              1.6499                              -0.0025  
## 
## Degrees of Freedom: 590 Total (i.e. Null);  589 Residual
## Null Deviance:       256 
## Residual Deviance: 255.1     AIC: NA

Summary:

summary(m4.1)
## 
## Call:
## glm(formula = data_for_model$indicator2 ~ data_for_model$n_extant_populations, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9108  -0.3905   0.5935   0.5941   0.7417  
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          1.649898   0.077770  21.215   <2e-16 ***
## data_for_model$n_extant_populations -0.002500   0.001622  -1.541    0.124    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.4362426)
## 
##     Null deviance: 256.01  on 590  degrees of freedom
## Residual deviance: 255.07  on 589  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

Because “what’s a population and how do you define them?” is such an important question, we can also test the effect of methods alone. First, subset the data to only those taxa where a single method was used:

ind2_single_methods<-indicators_full %>% 
                      filter(!is.na(indicator2)) %>% 
                      filter(n_extant_populations<500) %>%  # doesn't make a difference in the test below, but useful for 
                      filter(defined_populations_simplified=="genetic_clusters" | 
                             defined_populations_simplified=="geographic_boundaries" |
                             defined_populations_simplified=="eco_biogeo_proxies" | 
                             defined_populations_simplified=="management_units" |
                             defined_populations_simplified=="dispersal_buffer")


# check number of methods
length(unique(ind2_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind2_single_methods$defined_populations_simplified)
## 
##      dispersal_buffer    eco_biogeo_proxies      genetic_clusters 
##                    78                    32                    51 
## geographic_boundaries      management_units 
##                   176                    23
# check n total
nrow(ind2_single_methods)
## [1] 360
# re-level to use geographic boundaries as reference category for the analysis
ind2_single_methods$defined_populations_simplified<-relevel(as.factor(ind2_single_methods$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model:

# run model
m4.2 <- glm(ind2_single_methods$indicator2 ~ ind2_single_methods$n_extant_populations + 
            ind2_single_methods$defined_populations_simplified + 
            ind2_single_methods$n_extant_populations*ind2_single_methods$defined_populations_simplified, family = "quasibinomial")

Summary:

summary(m4.2)
## 
## Call:
## glm(formula = ind2_single_methods$indicator2 ~ ind2_single_methods$n_extant_populations + 
##     ind2_single_methods$defined_populations_simplified + ind2_single_methods$n_extant_populations * 
##     ind2_single_methods$defined_populations_simplified, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0141  -0.3358   0.3443   0.6686   0.7987  
## 
## Coefficients:
##                                                                                                                Estimate
## (Intercept)                                                                                                    1.380249
## ind2_single_methods$n_extant_populations                                                                       0.001082
## ind2_single_methods$defined_populations_simplifieddispersal_buffer                                            -0.469445
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                           0.058023
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters                                             1.478090
## ind2_single_methods$defined_populations_simplifiedmanagement_units                                             0.507064
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer    0.067182
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies  0.002354
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters   -0.063257
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units   -0.044946
##                                                                                                               Std. Error
## (Intercept)                                                                                                     0.153492
## ind2_single_methods$n_extant_populations                                                                        0.003369
## ind2_single_methods$defined_populations_simplifieddispersal_buffer                                              0.329594
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                            0.403533
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters                                              0.611069
## ind2_single_methods$defined_populations_simplifiedmanagement_units                                              0.537693
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer     0.043134
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies   0.007624
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters     0.125555
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units     0.024832
##                                                                                                               t value
## (Intercept)                                                                                                     8.992
## ind2_single_methods$n_extant_populations                                                                        0.321
## ind2_single_methods$defined_populations_simplifieddispersal_buffer                                             -1.424
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                            0.144
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters                                              2.419
## ind2_single_methods$defined_populations_simplifiedmanagement_units                                              0.943
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer     1.558
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies   0.309
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters    -0.504
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units    -1.810
##                                                                                                               Pr(>|t|)
## (Intercept)                                                                                                     <2e-16
## ind2_single_methods$n_extant_populations                                                                        0.7483
## ind2_single_methods$defined_populations_simplifieddispersal_buffer                                              0.1552
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                            0.8858
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters                                              0.0161
## ind2_single_methods$defined_populations_simplifiedmanagement_units                                              0.3463
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer     0.1202
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies   0.7577
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters     0.6147
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units     0.0712
##                                                                                                                  
## (Intercept)                                                                                                   ***
## ind2_single_methods$n_extant_populations                                                                         
## ind2_single_methods$defined_populations_simplifieddispersal_buffer                                               
## ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                             
## ind2_single_methods$defined_populations_simplifiedgenetic_clusters                                            *  
## ind2_single_methods$defined_populations_simplifiedmanagement_units                                               
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifieddispersal_buffer      
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedeco_biogeo_proxies    
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedgenetic_clusters      
## ind2_single_methods$n_extant_populations:ind2_single_methods$defined_populations_simplifiedmanagement_units   .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.5367873)
## 
##     Null deviance: 161.55  on 359  degrees of freedom
## Residual deviance: 150.03  on 350  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

(B) Is there a relationship between number of maintained populations and indicator1, overall, and/or with some methods?

Scatter plot of indicator1 vs extant pops

p5<- indicators_full %>%
  # filter outliers with too many pops and missing data
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>%
  filter(!is.na(n_extant_populations)) %>%
  
  # plot
    ggplot(aes(x=n_extant_populations, y=indicator1, color=defined_populations_nicenames)) +
    geom_point() +
    theme_light() +
    scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
    theme(legend.position = "none") +
    ylab("Proportion of populations with Ne>500") +
    xlab("Number of maintained populations") +
    theme(text = element_text(size = 13))
p5

Prepare data for model (remove outliers and NA in desired variable) and check n:

# remove missing data 
data_for_model<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>%
                      filter(!is.na(n_extant_populations)) %>%
                      filter(n_extant_populations<500) # doesn't make a difference in the test below, but useful for plots

# check number of methods
length(unique(data_for_model$defined_populations_simplified))
## [1] 11
# check n per method
table(data_for_model$defined_populations_simplified)
## 
##                         dispersal_buffer 
##                                      138 
##                       eco_biogeo_proxies 
##                                       16 
##                         genetic_clusters 
##                                       58 
##      genetic_clusters eco_biogeo_proxies 
##                                        8 
##   genetic_clusters geographic_boundaries 
##                                       41 
##                    geographic_boundaries 
##                                      156 
## geographic_boundaries eco_biogeo_proxies 
##                                       54 
##   geographic_boundaries management_units 
##                                       20 
##                         management_units 
##                                       13 
##                                    other 
##                                        6 
##                       other_combinations 
##                                       66
# total n
nrow(data_for_model)
## [1] 576
# re-level to use geographic boundaries as reference category for the analysis
data_for_model$defined_populations_simplified<-relevel(as.factor(data_for_model$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run GLM model including method. Note: the number of mantained populations varies a lot among countries, but it’s in the predictor here, not response, so don’t really need country as random factor.

This model will be unbalanced and it is more parametarized, but it would be the ideal if our dataset were larger:

# run model
m5 <- glm(data_for_model$indicator1 ~ data_for_model$n_extant_populations + 
            data_for_model$defined_populations_simplified + 
            data_for_model$n_extant_populations*data_for_model$defined_populations_simplified, family = "quasibinomial")

Summary:

summary(m5)
## 
## Call:
## glm(formula = data_for_model$indicator1 ~ data_for_model$n_extant_populations + 
##     data_for_model$defined_populations_simplified + data_for_model$n_extant_populations * 
##     data_for_model$defined_populations_simplified, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1591  -0.7162  -0.6800   0.6010   2.1385  
## 
## Coefficients:
##                                                                                                                            Estimate
## (Intercept)                                                                                                               -1.221104
## data_for_model$n_extant_populations                                                                                       -0.008692
## data_for_model$defined_populations_simplifieddispersal_buffer                                                             -0.127582
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                            0.434654
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                              1.441713
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                           1.198465
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                        0.853593
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                      1.032943
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                        1.021245
## data_for_model$defined_populations_simplifiedmanagement_units                                                              0.057216
## data_for_model$defined_populations_simplifiedother                                                                        -0.321946
## data_for_model$defined_populations_simplifiedother_combinations                                                            0.609022
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                          0.010729
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                       -0.034863
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                         -0.299725
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies      -0.011963
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries   -0.039303
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies -0.190448
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units   -0.030238
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                         -0.062346
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                     0.827158
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                       -0.032267
##                                                                                                                           Std. Error
## (Intercept)                                                                                                                 0.195759
## data_for_model$n_extant_populations                                                                                         0.013804
## data_for_model$defined_populations_simplifieddispersal_buffer                                                               0.278722
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                             0.699869
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                               0.456497
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                            0.902629
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                         0.411905
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                       0.418439
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                         0.545687
## data_for_model$defined_populations_simplifiedmanagement_units                                                               0.930333
## data_for_model$defined_populations_simplifiedother                                                                          2.105910
## data_for_model$defined_populations_simplifiedother_combinations                                                             0.365521
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                           0.014656
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                         0.049333
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                           0.159492
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies        0.170854
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries     0.054660
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies   0.108501
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units     0.052574
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                           0.145942
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                      1.443840
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                         0.036880
##                                                                                                                           t value
## (Intercept)                                                                                                                -6.238
## data_for_model$n_extant_populations                                                                                        -0.630
## data_for_model$defined_populations_simplifieddispersal_buffer                                                              -0.458
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                             0.621
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                               3.158
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                            1.328
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                         2.072
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                       2.469
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                         1.871
## data_for_model$defined_populations_simplifiedmanagement_units                                                               0.062
## data_for_model$defined_populations_simplifiedother                                                                         -0.153
## data_for_model$defined_populations_simplifiedother_combinations                                                             1.666
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                           0.732
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        -0.707
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                          -1.879
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       -0.070
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    -0.719
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  -1.755
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    -0.575
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                          -0.427
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                      0.573
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                        -0.875
##                                                                                                                           Pr(>|t|)
## (Intercept)                                                                                                               8.82e-10
## data_for_model$n_extant_populations                                                                                        0.52918
## data_for_model$defined_populations_simplifieddispersal_buffer                                                              0.64732
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                            0.53482
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                              0.00167
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                           0.18481
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                        0.03870
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                      0.01387
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                        0.06180
## data_for_model$defined_populations_simplifiedmanagement_units                                                              0.95098
## data_for_model$defined_populations_simplifiedother                                                                         0.87855
## data_for_model$defined_populations_simplifiedother_combinations                                                            0.09624
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                          0.46446
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                        0.48006
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                          0.06074
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies       0.94420
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries    0.47242
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies  0.07977
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units    0.56542
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                          0.66940
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                     0.56695
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                        0.38200
##                                                                                                                              
## (Intercept)                                                                                                               ***
## data_for_model$n_extant_populations                                                                                          
## data_for_model$defined_populations_simplifieddispersal_buffer                                                                
## data_for_model$defined_populations_simplifiedeco_biogeo_proxies                                                              
## data_for_model$defined_populations_simplifiedgenetic_clusters                                                             ** 
## data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies                                             
## data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries                                       *  
## data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies                                     *  
## data_for_model$defined_populations_simplifiedgeographic_boundaries management_units                                       .  
## data_for_model$defined_populations_simplifiedmanagement_units                                                                
## data_for_model$defined_populations_simplifiedother                                                                           
## data_for_model$defined_populations_simplifiedother_combinations                                                           .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifieddispersal_buffer                            
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedeco_biogeo_proxies                          
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters                         .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters eco_biogeo_proxies         
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgenetic_clusters geographic_boundaries      
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries eco_biogeo_proxies .  
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedgeographic_boundaries management_units      
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedmanagement_units                            
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother                                       
## data_for_model$n_extant_populations:data_for_model$defined_populations_simplifiedother_combinations                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.797664)
## 
##     Null deviance: 534.42  on 575  degrees of freedom
## Residual deviance: 498.66  on 554  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7

We run a similar model than above, but limited to relationship of interest: glm(indicator1 ~ N_maintained_pops, family = “quasibinomial”).

m5.1 <- glm(data_for_model$indicator1 ~ data_for_model$n_extant_populations, family = "quasibinomial")
m5.1
## 
## Call:  glm(formula = data_for_model$indicator1 ~ data_for_model$n_extant_populations, 
##     family = "quasibinomial")
## 
## Coefficients:
##                         (Intercept)  data_for_model$n_extant_populations  
##                             -0.9137                              -0.0104  
## 
## Degrees of Freedom: 575 Total (i.e. Null);  574 Residual
## Null Deviance:       534.4 
## Residual Deviance: 530.6     AIC: NA

Summary:

summary(m5.1)
## 
## Call:
## glm(formula = data_for_model$indicator1 ~ data_for_model$n_extant_populations, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8176  -0.8140  -0.7820   0.4714   2.8855  
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         -0.913677   0.096777  -9.441   <2e-16 ***
## data_for_model$n_extant_populations -0.010398   0.006151  -1.690   0.0915 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8750042)
## 
##     Null deviance: 534.42  on 575  degrees of freedom
## Residual deviance: 530.64  on 574  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

Because “what’s a population and how do you define them?” is such an important question, we can also test the effect of methods alone. First, subset the data to only those taxa where a single method was used:

ind1_single_methods<-indicators_full %>% 
                      filter(!is.na(indicator1)) %>% 
                      filter(n_extant_populations<500) %>%  # doesn't make a difference in the test below, but useful for 
                      filter(defined_populations_simplified=="genetic_clusters" | 
                             defined_populations_simplified=="geographic_boundaries" |
                             defined_populations_simplified=="eco_biogeo_proxies" | 
                             defined_populations_simplified=="management_units" |
                             defined_populations_simplified=="dispersal_buffer")


# check number of methods
length(unique(ind1_single_methods$defined_populations_simplified))
## [1] 5
# check n by method
table(ind1_single_methods$defined_populations_simplified)
## 
##      dispersal_buffer    eco_biogeo_proxies      genetic_clusters 
##                   138                    16                    58 
## geographic_boundaries      management_units 
##                   156                    13
# check n total
nrow(ind1_single_methods)
## [1] 381
# re-level to use geographic boundaries as reference category for the analysis
ind1_single_methods$defined_populations_simplified<-relevel(as.factor(ind1_single_methods$defined_populations_simplified),
                                                       ref="geographic_boundaries")

Run model:

# run model
m5.2 <- glm(ind1_single_methods$indicator1 ~ ind1_single_methods$n_extant_populations + 
            ind1_single_methods$defined_populations_simplified + 
            ind1_single_methods$n_extant_populations*ind1_single_methods$defined_populations_simplified, family = "quasibinomial")

Summary:

summary(m5.2)
## 
## Call:
## glm(formula = ind1_single_methods$indicator1 ~ ind1_single_methods$n_extant_populations + 
##     ind1_single_methods$defined_populations_simplified + ind1_single_methods$n_extant_populations * 
##     ind1_single_methods$defined_populations_simplified, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1404  -0.7107  -0.6800   0.4115   1.7764  
## 
## Coefficients:
##                                                                                                                Estimate
## (Intercept)                                                                                                   -1.221104
## ind1_single_methods$n_extant_populations                                                                      -0.008692
## ind1_single_methods$defined_populations_simplifieddispersal_buffer                                            -0.127582
## ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                           0.434654
## ind1_single_methods$defined_populations_simplifiedgenetic_clusters                                             1.441713
## ind1_single_methods$defined_populations_simplifiedmanagement_units                                             0.057215
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifieddispersal_buffer    0.010729
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies -0.034863
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedgenetic_clusters   -0.299725
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedmanagement_units   -0.062346
##                                                                                                               Std. Error
## (Intercept)                                                                                                     0.192209
## ind1_single_methods$n_extant_populations                                                                        0.013553
## ind1_single_methods$defined_populations_simplifieddispersal_buffer                                              0.273668
## ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                            0.687177
## ind1_single_methods$defined_populations_simplifiedgenetic_clusters                                              0.448218
## ind1_single_methods$defined_populations_simplifiedmanagement_units                                              0.913223
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifieddispersal_buffer     0.014391
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies   0.048438
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedgenetic_clusters     0.156600
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedmanagement_units     0.143171
##                                                                                                               t value
## (Intercept)                                                                                                    -6.353
## ind1_single_methods$n_extant_populations                                                                       -0.641
## ind1_single_methods$defined_populations_simplifieddispersal_buffer                                             -0.466
## ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                            0.633
## ind1_single_methods$defined_populations_simplifiedgenetic_clusters                                              3.217
## ind1_single_methods$defined_populations_simplifiedmanagement_units                                              0.063
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifieddispersal_buffer     0.746
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies  -0.720
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedgenetic_clusters    -1.914
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedmanagement_units    -0.435
##                                                                                                               Pr(>|t|)
## (Intercept)                                                                                                   6.19e-10
## ind1_single_methods$n_extant_populations                                                                       0.52173
## ind1_single_methods$defined_populations_simplifieddispersal_buffer                                             0.64135
## ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                           0.52744
## ind1_single_methods$defined_populations_simplifiedgenetic_clusters                                             0.00141
## ind1_single_methods$defined_populations_simplifiedmanagement_units                                             0.95008
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifieddispersal_buffer    0.45641
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies  0.47214
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedgenetic_clusters    0.05640
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedmanagement_units    0.66348
##                                                                                                                  
## (Intercept)                                                                                                   ***
## ind1_single_methods$n_extant_populations                                                                         
## ind1_single_methods$defined_populations_simplifieddispersal_buffer                                               
## ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies                                             
## ind1_single_methods$defined_populations_simplifiedgenetic_clusters                                            ** 
## ind1_single_methods$defined_populations_simplifiedmanagement_units                                               
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifieddispersal_buffer      
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedeco_biogeo_proxies    
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedgenetic_clusters   .  
## ind1_single_methods$n_extant_populations:ind1_single_methods$defined_populations_simplifiedmanagement_units      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7689951)
## 
##     Null deviance: 327.69  on 380  degrees of freedom
## Residual deviance: 314.32  on 371  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

Single plot 3 panels box plots for the effect of method on: number of populations, proportion of maintained populations (indicator 2) and Proportion of populations with Ne>500 (indicator 1).

Plot in three panels.

##### plot for Proportion of maintained populations (indicator 2) only with n in axis labels

# sample size 
sample_size <- indicators_full %>%
                    filter(!is.na(indicator2)) %>% 
                    filter(n_extant_populations<500) %>% 
                    group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator2)) %>% 
    # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
p2.1<- df %>%
  filter(n_extant_populations<500) %>%
  ggplot(aes(x=myaxis, y=indicator2, color=defined_populations_nicenames,    
                                     fill=defined_populations_nicenames)) +
          geom_boxplot() + xlab("") + ylab("Proportion of maintained populations") +
          geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  
  scale_x_discrete(limits=rev, 
                   labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
  theme(text = element_text(size = 13))


##### plot for Proportion populations Ne>500 (indicator 1) only with n in axis labels
# Prepare data for plot with nice labels:
# sample size of TOTAL populations
sample_size <- indicators_full %>%
  filter(!is.na(indicator1)) %>% 
  filter(n_extant_populations<500) %>% 
  group_by(defined_populations_nicenames) %>% summarize(num=n())

# custom axis
## new dataframe
df<-indicators_full %>% 
  filter(n_extant_populations<500) %>%
  filter(!is.na(indicator1)) %>% 
  # add sampling size 
  left_join(sample_size) %>%
  mutate(myaxis = as.factor(paste0(defined_populations_nicenames, " (n= ", num, ")")))
## Joining, by = "defined_populations_nicenames"
## plot 
p3.1<- df %>%
  ggplot(aes(x=myaxis, y=indicator1, color=defined_populations_nicenames,    
             fill=defined_populations_nicenames)) +
  geom_boxplot() + xlab("") + ylab("Proportion of populations with Ne>500") +
  geom_jitter(size=.4, width = 0.1, color="black") +
  coord_flip() +
  theme_light() +
  theme(panel.border = element_blank(), legend.position="none",
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")) + # this is used to decrease the space between plots) 
  scale_fill_manual(values=alpha(simplifiedmethods_colors, 0.3),
                    breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_color_manual(values=simplifiedmethods_colors,
                     breaks=levels(as.factor(indicators_full$defined_populations_nicenames))) +
  scale_x_discrete(limits=rev, 
                   labels= rev(sub(".*(\\(n= \\d+\\))", "\\1", levels(df$myaxis)))) + # extract "(n = number)") and show them in reverse order
  theme(text = element_text(size = 13))


## Plot 3 panels
plot_grid(p1, p2.1, p3.1, ncol=3, rel_widths = c(1.9,1,1), align = "h", labels=c("a)", "b)", "c)"))  

Single figure 2 panels scatter plots number of populations vs indicators

# plot
plot_grid(p4 + xlim(0,400) + xlab(""), # remove xlab from top plot and match x axis size
          p5+ xlim(0,400), 
          ncol=1, align = "v", labels=c("a)", "b)"))  

Indicator 1 and indicator 2 by type of range

All the following plots and analyses consider the average of multiassessed species, so that they are shown only once.

To have nicer looking plots, change “wide_ranging” for “wide ranging”:

indicators_averaged_one$species_range<-gsub("wide_ranging", "wide ranging", indicators_averaged_one$species_range)

Indicator 1 (Ne>5000)

Plot Indicator 1 by type of range in the entire dataset. Filtering NA in species range:

# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator1_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
p1<-indicators_averaged_one %>% 
    filter(!is.na(indicator1_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator1_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
                       labels=c("wide ranging", "restricted", "unknown"),
                       values=c("#00BFC4", "#F8766D", "grey80")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p1

Plot Indicator 1 by country and type of range. Remove “unknown” and NA for better visualization.

### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

## plot
df  %>% 
  # filter out "unknown" range
  filter(species_range =="wide ranging" | species_range =="restricted") %>% 
  
# plot
ggplot(aes(x=species_range, y=indicator1_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_x_discrete(breaks=c("wide_ranging", "restricted"),
                        labels=c("wide ranging", "restricted")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
      facet_wrap(~facet, ncol = 5) +
      theme(panel.spacing = unit(1.5, "lines"))  
## Warning: Removed 662 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 662 rows containing missing values (`geom_point()`).

Test the effect of range type on Ne>500 indicator. Does the indicator vary between wide randing vs restricted distribution species? (keep only those categories and remove unknwon due to small sampling size)

## Remove unknown
data<- indicators_averaged_one  %>%
                    filter(!is.na(indicator1_mean)) %>% 
                    filter(species_range =="wide ranging" | species_range =="restricted")
  
## run model 
m1 <- glm(data$indicator1_mean ~ data$species_range, family = "quasibinomial")

# summary results
summary(m1)
## 
## Call:
## glm(formula = data$indicator1_mean ~ data$species_range, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9928  -0.6431  -0.6431   0.5604   1.8318  
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.4710     0.1282 -11.478  < 2e-16 ***
## data$species_rangewide ranging   1.0199     0.1750   5.829 9.62e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7758533)
## 
##     Null deviance: 498.19  on 540  degrees of freedom
## Residual deviance: 471.07  on 539  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
## + country
m1.1 <- glm(data$indicator1_mean ~ data$species_range + data$country_assessment,  family = "quasibinomial")

# summary results
summary(m1.1)
## 
## Call:
## glm(formula = data$indicator1_mean ~ data$species_range + data$country_assessment, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3373  -0.7829  -0.5238   0.4785   2.1362  
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -2.1740     0.3624  -5.998 3.70e-09 ***
## data$species_rangewide ranging     1.2284     0.1956   6.281 7.01e-10 ***
## data$country_assessmentBelgium     0.1085     0.4019   0.270  0.78740    
## data$country_assessmentColombia    1.1416     0.4962   2.301  0.02180 *  
## data$country_assessmentFrance      1.1987     0.4218   2.842  0.00466 ** 
## data$country_assessmentJapan      -0.7796     0.5762  -1.353  0.17668    
## data$country_assessmentMexico      0.3781     0.4654   0.813  0.41684    
## data$country_assessmentS. Africa   1.1484     0.4158   2.762  0.00594 ** 
## data$country_assessmentSweden      0.2568     0.4224   0.608  0.54355    
## data$country_assessmentUS          1.3138     0.4082   3.218  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7391957)
## 
##     Null deviance: 498.19  on 540  degrees of freedom
## Residual deviance: 436.21  on 531  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

Indicator 2 (mantained populations)

Plot Indicator 2 by type of range in the entire dataset. Filtering NA in species range:

# get sample size by desired category
sample_size <- indicators_averaged_one  %>%
                    filter(!is.na(indicator2_mean)) %>% 
                    filter(!is.na(species_range)) %>% 
                    group_by(species_range) %>% summarize(num=n())

# plot
p2<-indicators_averaged_one %>% 
    filter(!is.na(indicator2_mean)) %>% 
    filter(!is.na(species_range)) %>% 
  # add sampling size 
  left_join(sample_size) %>% 
  mutate(myaxis = paste0(species_range, " (n= ", num, ")")) %>%

  # plot
  ggplot(aes(x=myaxis, y=indicator2_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations maintained") +
      coord_flip() +
      scale_fill_manual(breaks=c("wide ranging", "restricted", "unknown"),
                       labels=c("wide ranging", "restricted", "unknown"),
                       values=c("#00BFC4", "#F8766D", "grey80")) +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=20))
## Joining, by = "species_range"
p2

Plot Indicator 2 by country and type of range. We remove NA and unknown for better visualization.

### Duplicate dataframe to have a column with "all data" for faceting
df<-CreateAllFacet(indicators_averaged_one, "country_assessment")

# order with "all" as last
df$facet <- factor(df$facet, levels=c("Australia", "Belgium", "Colombia", "France", "Japan", "Mexico", "S. Africa", "Sweden", "US", "all"))

## plot
df  %>% 
  # filter out "unknown" range
  filter(species_range =="wide ranging" | species_range =="restricted") %>% 
  
# plot
ggplot(aes(x=species_range, y=indicator2_mean , fill=species_range)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations maintained") +
      coord_flip() +
      theme_light() +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
      facet_wrap(~facet, ncol = 5) +
      theme(panel.spacing = unit(1.5, "lines"))  
## Warning: Removed 688 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 688 rows containing missing values (`geom_point()`).

Test the effect of range type on the proportion of maitained populations. Does the indicator vary between wide randing vs restricted distribution species? Consider only wide randing and restricted categores (ie remove unknown due to small sampling size)

## Remove unknown
data<- indicators_averaged_one  %>%
                    filter(!is.na(indicator2_mean)) %>% 
                    filter(species_range =="wide ranging" | species_range =="restricted")
  
## run model 
m2 <- glm(data$indicator2_mean ~ data$species_range, family = "quasibinomial")

# summary results
summary(m1)
## 
## Call:
## glm(formula = data$indicator1_mean ~ data$species_range, family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9928  -0.6431  -0.6431   0.5604   1.8318  
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.4710     0.1282 -11.478  < 2e-16 ***
## data$species_rangewide ranging   1.0199     0.1750   5.829 9.62e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7758533)
## 
##     Null deviance: 498.19  on 540  degrees of freedom
## Residual deviance: 471.07  on 539  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
## + country
m2.1 <- glm(data$indicator2_mean ~ data$species_range + data$country_assessment,  family = "quasibinomial")

# summary results
summary(m2.1)
## 
## Call:
## glm(formula = data$indicator2_mean ~ data$species_range + data$country_assessment, 
##     family = "quasibinomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8214  -0.2190   0.3212   0.5137   1.1585  
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        1.9718     0.4038   4.883 1.39e-06 ***
## data$species_rangewide ranging     0.5967     0.1779   3.355 0.000852 ***
## data$country_assessmentBelgium    -2.5237     0.4729  -5.337 1.42e-07 ***
## data$country_assessmentColombia   -0.3274     0.4824  -0.679 0.497565    
## data$country_assessmentFrance     -0.5243     0.5006  -1.047 0.295479    
## data$country_assessmentJapan       0.3702     0.5200   0.712 0.476920    
## data$country_assessmentMexico      0.5020     0.6258   0.802 0.422853    
## data$country_assessmentS. Africa   0.6370     0.4963   1.284 0.199870    
## data$country_assessmentSweden     -0.9612     0.4212  -2.282 0.022887 *  
## data$country_assessmentUS         -0.6485     0.4253  -1.525 0.127959    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.3846378)
## 
##     Null deviance: 226.79  on 527  degrees of freedom
## Residual deviance: 182.65  on 518  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

Single plot PM and Ne indicators by range type

plot_grid(p1, p2,  ncol=1, align = "v", labels=c("a)", "b)"))  

Indicator 1 and Indicator 2 by IUCN status

Indicator 1

Indicator 1 by global IUCN in the entire dataset:

## Global IUCN
## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               group_by(global_IUCN) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator1_mean)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(global_IUCN, " (n= ", num, ")"))
## Joining, by = "global_IUCN"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                  levels=c(grep("cr", unique(df$myaxis), value = TRUE),
                          grep("en", unique(df$myaxis), value = TRUE),
                          grep("vu", unique(df$myaxis), value = TRUE),
                          grep("nt", unique(df$myaxis), value = TRUE),
                          grep("lc", unique(df$myaxis), value = TRUE),
                          grep("dd", unique(df$myaxis), value = TRUE),
                          grep("not_assessed", unique(df$myaxis), value = TRUE),
                          grep("unknown", unique(df$myaxis), value = TRUE)))

df$global_IUCN<-factor(df$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

      
# plot
p1<-df %>%
    ggplot(aes(x=myaxis, y=indicator1 , fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                        breaks=c(levels(df$global_IUCN))) +
      theme_light() +
      ggtitle("a) global Red List") +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15))
p1
## Warning: Removed 3 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 3 rows containing missing values (`geom_point()`).

Indicator 1 by regional IUCN in the entire dataset:

## Regional IUCN

## prepare data
# add sampling size
sample_size <- indicators_averaged_one %>%
               filter(!is.na(indicator1_mean)) %>% 
               group_by(regional_redlist) %>% summarize(num=n())

# new df 
df<- indicators_averaged_one %>% 
     filter(!is.na(indicator1_mean)) %>% 
        # add sampling size 
        left_join(sample_size) %>%
        mutate(myaxis = paste0(regional_redlist, " (n= ", num, ")"))
## Joining, by = "regional_redlist"
# change order of levels so that they are in the desired order
df$myaxis<-factor(df$myaxis, 
                  #grep is used below to get the sample size, which may change depending on the data
                  levels=c(grep("re", unique(df$myaxis), value = TRUE),
                          grep("cr", unique(df$myaxis), value = TRUE),
                          grep("en", unique(df$myaxis), value = TRUE),
                          grep("vu", unique(df$myaxis), value = TRUE),
                          grep("nt", unique(df$myaxis), value = TRUE),
                          grep("lc", unique(df$myaxis), value = TRUE),
                          grep("dd", unique(df$myaxis), value = TRUE),
                          grep("not_assessed", unique(df$myaxis), value = TRUE),
                          grep("unknown", unique(df$myaxis), value = TRUE)))

df$regional_redlist<-factor(df$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))
      
# plot
p2<-df %>%
    ggplot(aes(x=myaxis, y=indicator1_mean , fill=regional_redlist)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
                        breaks=c(levels(df$regional_redlist))) +
      theme_light() +
      ggtitle("b) regional Red List") +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15))
p2
## Warning: Groups with fewer than two data points have been dropped.

Both together

plot_grid(p1, p2, ncol=2)
## Warning: Removed 3 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Groups with fewer than two data points have been dropped.

Indicator 1 by country and global IUCN

## change order of levels so that categories match with the order of colors
indicators_averaged_one$global_IUCN<-factor(indicators_averaged_one$global_IUCN, levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

# plot
indicators_averaged_one %>% 
  # plot
  ggplot(aes(x=global_IUCN, y=indicator1_mean, fill=global_IUCN)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors, # iucn color codes
                    breaks=c(levels(indicator1$global_IUCN))) +
      theme_light() +
      ggtitle("global IUCN Redlist") +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=13)) +
      facet_wrap(~country_assessment, ncol = 3) +
      theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 348 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 348 rows containing missing values (`geom_point()`).

Indicator1 by regional IUCN Redlist, excluding US and Mexico becasue they don’t have a regional IUCN redlist.

## change order of levels so that categories match with the order of colors
indicators_averaged_one$regional_redlist<-factor(indicators_averaged_one$regional_redlist, levels=c("re","cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))

# plot
indicators_averaged_one %>% 
  # filter US and Mx
  filter(country_assessment!="Mexico", country_assessment!="US") %>%
  
  # plot
  ggplot(aes(x=regional_redlist, y=indicator1_mean, fill=regional_redlist)) +
      geom_violin(width=1, linewidth = 0)  +
      geom_jitter(size=.5, width = 0.1) +
      xlab("") + ylab("Proportion of populations with Ne>500") +
      coord_flip() +
      scale_fill_manual(values= IUCNcolors_regional, # iucn color codes
                    breaks=c(levels(indicator1$regional_redlist))) +
      theme_light() +
      ggtitle("regional IUCN Redlist") +
      theme(panel.border = element_blank(), legend.position="none", text= element_text(size=15)) +
      facet_wrap(~country_assessment, ncol = 4) +
      theme(panel.spacing = unit(1.5, "lines"))
## Warning: Removed 212 rows containing non-finite values (`stat_ydensity()`).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 212 rows containing missing values (`geom_point()`).

Indicator values by taxonomic group

Values of indicator 1 and indicator 2 for multiassessed species

#subset only with taxa assessed multiple times:
only_multi<-indicators_full %>% 
                          filter(multiassessment=="multiassessment") 

First, check how indicator 1 changes across the multiassessments.

p1<-only_multi %>% 
  # Keep rows with different values in indicator1 within each taxon group
  group_by(taxon) %>%
  filter(n_distinct(indicator1) > 1) %>%
  # plot
  ggplot(aes(x=taxon, y=indicator1)) +
          geom_line(colour="darkgrey") + 
          geom_point(aes(color=country_assessment)) +
  xlab("") + ylab("Proportion of populations with Ne>500") +
  labs(color="country") +
  ylim(0, 1)+
  coord_flip() +
  theme_light() + 
  theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p1
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

Now check how Proportion of maintained populations (indicator 2) changes across the multiassessments.

p2<-only_multi %>% 
  # Keep rows with different values in indicator1 within each taxon group
  group_by(taxon) %>%
  filter(n_distinct(indicator2) > 1) %>%
  
  ggplot(aes(x=taxon, y=indicator2)) +
          geom_line(colour="darkgrey") + 
          geom_point(aes(color=country_assessment)) +
    scale_color_manual(values= scales::hue_pal()(4)[2:4]) + # last 3 colors to make them the same than the other plot
  xlab("") + ylab("Proportion of populations maintained") +
  labs(color="country") +
  coord_flip() +
  theme_light() + 
  theme(panel.border = element_blank(), legend.position="right", text= element_text(size=13))
p2
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Plot together:

plot_grid(p2, p1,  
          rel_heights = c(1.3, 0.9),
          ncol=1, labels=c("a)", "b)")) 
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

Indicator 3 (number of species with genetic diversity monitoring)

Indicator 3 refers to the number (count) of taxa by country in which genetic monitoring is occurring. This is stored in the variable temp_gen_monitoring as a “yes/no” answer for each taxon.

indicator3

Plot by global IUCN redlist status

# desired order of levels
indicators_full$global_IUCN<-factor(as.factor(indicators_full$global_IUCN), levels=c("cr", "en", "vu", "nt", "lc", "dd", "not_assessed", "unknown"))


## plot
indicators_full %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring, global_IUCN) %>%
                 filter(!duplicated(.)) %>%

                 # count "yes" in tem_gen_monitoring by country
                 filter(temp_gen_monitoring=="yes") %>%
ggplot(aes(x=country_assessment, fill=global_IUCN)) +
  geom_bar() +
  xlab("") + ylab("Number of taxa with temporal genetic diversity monitoring") +
  scale_fill_manual(values= IUCNcolors, # iucn color codes
                    breaks=levels(as.factor(indicators_full$global_IUCN))) +
      theme_light()

Relatively few taxa have genetic monitoring, but many have some sort of genetic study. Let’s check that with a Sankey Plot:

# first subset the ind3_data keeping only taxa assessed a single time, plust the first record of those assessed multiple times.
ind3_data_firstmulti<-ind3_data[!duplicated(cbind(ind3_data$taxon, ind3_data$country_assessment)), ]

# transform data to how ggsankey wants it
df <- ind3_data_firstmulti %>%
  make_long(country_assessment, temp_gen_monitoring, gen_studies)

# plot
ggplot(df, aes(x = x,
               next_x = next_x,
               node = node,
               next_node = next_node,
               fill = factor(node),
               label = node)) +
  geom_sankey(flow.alpha = 0.5, 
              show.legend = FALSE) +
  geom_sankey_label(size = 2.5, color = "black", fill = "white") +
  theme_sankey(base_size = 10) +

    # manually set flow fill according to desired color
                            # countries
  scale_fill_manual(values=c(scales::hue_pal()(length(unique(ind3_data_firstmulti$country_assessment))),  
                             # traffic light for monitoring
                             c("darkolivegreen", "brown3", "darkgrey"),
                             # nice soft colors for gen_studies
                             c("grey50", "grey35", "grey50", "brown3")),
                              
                    breaks=c(unique(ind3_data_firstmulti$country_assessment),
                             unique(ind3_data_firstmulti$temp_gen_monitoring),
                             unique(ind3_data_firstmulti$gen_studies))) +
  
  xlab("")
## Warning: Removed 2 rows containing missing values (`geom_label()`).

table(ind3_data_firstmulti$gen_studies)
## 
##        no     phylo phylo_pop       pop 
##       386       185       239        94

Count data:

ind3_data %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, gen_studies, temp_gen_monitoring) %>%
                 filter(!duplicated(.)) %>%

                 group_by(country_assessment, temp_gen_monitoring, gen_studies) %>%
                 summarise(n_studies=n())
## `summarise()` has grouped output by 'country_assessment',
## 'temp_gen_monitoring'. You can override using the `.groups` argument.

How many genetic studies ara available by country for species without temporal genetic diversity monitoring?

## plot
indicators_full %>%
                 # keep only one record if the taxon was assessed more than once within the country
                 select(country_assessment, taxon, temp_gen_monitoring, gen_studies) %>%
                 filter(!duplicated(.)) %>%
                 # keep only taxa without gen div monitoring
                 filter(temp_gen_monitoring=="no")%>%

ggplot(aes(x=country_assessment, fill=gen_studies)) +
  geom_bar() +
    scale_fill_manual(values=c("grey80", scales::hue_pal()(3)))+
  xlab("")  +
      theme_light()

Summary table of mean indicator values and n

The tables below show the indicator values and sampling size averaging them by country, taxonomic group, distribution type or IUCN global red list status. For this summary the mean of the multiassessed species was considering and counted as a single entry for the sampling size.

Codes for indicator names:

Codes for summary stats:

Summary stats by country:

x<-indicators_averaged_one %>% 
                group_by(country_assessment) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
country_assessment n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia 28 0.903 0.178 47 0.170 0.299 10
Belgium 27 0.453 0.221 101 0.246 0.381 10
Colombia 50 0.831 0.230 41 0.341 0.480 NA
France 34 0.854 0.278 55 0.416 0.471 7
Japan 50 0.925 0.152 50 0.077 0.180 0
Mexico 28 0.936 0.135 47 0.217 0.354 7
S. Africa 90 0.948 0.155 61 0.422 0.475 5
Sweden 120 0.777 0.271 81 0.192 0.334 20
US 117 0.794 0.244 75 0.370 0.415 6

Taxonomic groups

Summary stats by taxonomic group:

x<-indicators_averaged_one %>% 
                group_by(taxonomic_group) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
taxonomic_group n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
amphibian 43 0.833 0.244 24 0.159 0.258 9
angiosperm 144 0.841 0.239 186 0.179 0.313 6
bird 83 0.834 0.252 89 0.328 0.448 NA
bryophyte 4 0.688 0.252 2 0.250 0.354 0
fish 42 0.779 0.244 34 0.414 0.448 11
fungus 3 0.903 0.167 2 0.500 0.707 0
gymnosperm 9 0.975 0.050 15 0.161 0.353 0
invertebrate 77 0.671 0.309 64 0.278 0.406 4
mammal 80 0.937 0.161 95 0.419 0.461 22
other 13 0.856 0.142 6 0.000 0.000 3
pteridophytes 8 0.824 0.251 11 0.179 0.284 0
reptile 38 0.909 0.171 30 0.298 0.441 1

Detailed table:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, taxonomic_group) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment taxonomic_group n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia amphibian 0 NaN NA 1 0.000 NA 0
Australia angiosperm 2 0.700 0.424 15 0.115 0.276 1
Australia bird 9 1.000 0.000 9 0.167 0.264 2
Australia bryophyte 0 NaN NA 1 0.500 NA 0
Australia fish 1 1.000 NA 2 0.500 0.707 1
Australia gymnosperm 0 NaN NA 2 0.000 0.000 0
Australia invertebrate 1 0.500 NA 0 NaN NA 0
Australia mammal 3 0.750 0.250 10 0.303 0.359 3
Australia other 5 0.887 0.141 1 0.000 NA 3
Australia pteridophytes 0 NaN NA 1 0.000 NA 0
Australia reptile 7 0.958 0.078 5 0.050 0.112 0
Belgium amphibian 3 0.310 0.170 9 0.189 0.329 1
Belgium angiosperm 5 0.446 0.279 26 0.093 0.219 0
Belgium bryophyte 1 0.444 NA 1 0.000 NA 0
Belgium fish 5 0.570 0.153 9 0.206 0.352 2
Belgium gymnosperm 0 NaN NA 1 0.050 NA 0
Belgium invertebrate 10 0.444 0.259 30 0.323 0.416 3
Belgium mammal 3 0.444 0.192 19 0.447 0.497 4
Belgium pteridophytes 0 NaN NA 2 0.250 0.354 0
Belgium reptile 0 NaN NA 4 0.030 0.026 0
Colombia amphibian 2 0.625 0.177 0 NaN NA 0
Colombia angiosperm 6 1.000 0.000 6 0.000 0.000 0
Colombia bird 35 0.795 0.242 29 0.448 0.506 NA
Colombia fish 2 1.000 0.000 2 0.500 0.707 0
Colombia mammal 1 0.500 NA 1 0.000 NA 0
Colombia other 1 1.000 NA 1 0.000 NA 0
Colombia reptile 3 1.000 0.000 2 0.000 0.000 0
France amphibian 1 1.000 NA 1 0.000 NA 1
France angiosperm 3 0.667 0.577 6 0.583 0.492 0
France bird 11 0.852 0.259 20 0.342 0.460 1
France fish 1 0.167 NA 6 0.589 0.463 2
France fungus 1 1.000 NA 1 1.000 NA 0
France gymnosperm 1 1.000 NA 2 1.000 0.000 0
France invertebrate 3 0.700 0.265 7 0.405 0.508 0
France mammal 11 0.955 0.151 10 0.217 0.416 3
France other 1 0.900 NA 0 NaN NA 0
France reptile 1 1.000 NA 2 0.500 0.707 0
Japan angiosperm 39 0.931 0.130 39 0.061 0.148 0
Japan gymnosperm 4 1.000 0.000 4 0.000 0.000 0
Japan pteridophytes 7 0.847 0.262 7 0.210 0.316 0
Mexico amphibian 0 NaN NA 2 0.000 0.000 0
Mexico angiosperm 20 0.959 0.120 29 0.236 0.339 5
Mexico bird 1 0.667 NA 2 0.500 0.707 1
Mexico fish 0 NaN NA 0 NaN NA 0
Mexico gymnosperm 2 0.886 0.005 6 0.061 0.148 0
Mexico invertebrate 1 1.000 NA 0 NaN NA 0
Mexico mammal 3 0.867 0.231 3 0.000 0.000 1
Mexico pteridophytes 0 NaN NA 1 0.000 NA 0
Mexico reptile 1 1.000 NA 4 0.500 0.577 0
S. Africa amphibian 18 0.918 0.173 4 0.125 0.250 2
S. Africa angiosperm 12 0.833 0.277 10 0.060 0.190 0
S. Africa bird 11 1.000 0.000 11 0.327 0.467 1
S. Africa fish 9 1.000 0.000 4 0.297 0.477 0
S. Africa gymnosperm 1 1.000 NA 0 NaN NA 0
S. Africa invertebrate 0 NaN NA 0 NaN NA 0
S. Africa mammal 32 0.992 0.044 31 0.608 0.480 2
S. Africa reptile 7 0.869 0.254 1 1.000 NA 0
Sweden amphibian 13 0.891 0.183 7 0.232 0.233 5
Sweden angiosperm 22 0.622 0.259 18 0.159 0.258 0
Sweden bird 11 0.696 0.385 9 0.111 0.333 2
Sweden bryophyte 2 0.904 0.048 0 NaN NA 0
Sweden fish 7 0.738 0.290 4 0.299 0.476 4
Sweden fungus 2 0.855 0.205 1 0.000 NA 0
Sweden invertebrate 29 0.674 0.292 20 0.078 0.225 0
Sweden mammal 20 0.986 0.047 15 0.361 0.447 8
Sweden other 6 0.800 0.153 4 0.000 0.000 0
Sweden pteridophytes 1 0.667 NA 0 NaN NA 0
Sweden reptile 7 0.983 0.045 3 0.619 0.541 1
US amphibian 6 0.754 0.267 0 NaN NA 0
US angiosperm 35 0.867 0.181 37 0.348 0.402 0
US bird 5 0.741 0.205 9 0.254 0.375 2
US bryophyte 1 0.500 NA 0 NaN NA 0
US fish 17 0.737 0.198 7 0.615 0.448 2
US gymnosperm 1 1.000 NA 0 NaN NA 0
US invertebrate 33 0.730 0.324 7 0.533 0.493 1
US mammal 7 0.905 0.194 6 0.303 0.351 1
US reptile 12 0.823 0.202 9 0.302 0.460 0

IUCN

Summary stats:

x<-indicators_averaged_one %>% 
                group_by(global_IUCN) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
global_IUCN n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
cr 40 0.843 0.263 44 0.114 0.289 8
en 59 0.786 0.254 47 0.265 0.418 9
vu 73 0.805 0.248 65 0.312 0.417 4
nt 50 0.849 0.249 50 0.237 0.375 7
lc 154 0.849 0.250 179 0.377 0.439 32
dd 9 0.707 0.313 10 0.442 0.490 2
not_assessed 157 0.838 0.233 159 0.184 0.326 3
unknown 2 1.000 0.000 3 0.667 0.577 0
NA 0 NaN NA 1 0.000 NA NA

Detailed table by IUCN category:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, global_IUCN) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment global_IUCN n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia cr 5 0.860 0.219 10 0.000 0.000 3
Australia en 4 0.850 0.300 7 0.167 0.264 2
Australia vu 6 0.943 0.101 8 0.260 0.355 1
Australia nt 4 1.000 0.000 5 0.353 0.328 0
Australia lc 3 1.000 0.000 8 0.229 0.367 1
Australia not_assessed 6 0.822 0.202 9 0.128 0.329 3
Australia unknown 0 NaN NA 0 NaN NA 0
Belgium cr 1 0.333 NA 2 0.500 0.707 0
Belgium en 1 0.455 NA 1 0.000 NA 0
Belgium vu 3 0.548 0.410 3 0.333 0.577 0
Belgium nt 2 0.310 0.034 13 0.030 0.058 3
Belgium lc 19 0.466 0.215 64 0.285 0.397 7
Belgium dd 1 0.333 NA 3 0.364 0.553 0
Belgium not_assessed 0 NaN NA 14 0.151 0.292 0
Belgium unknown 0 NaN NA 1 1.000 NA 0
Colombia cr 7 0.843 0.270 7 0.000 0.000 0
Colombia en 5 0.620 0.247 3 0.667 0.577 0
Colombia vu 20 0.812 0.225 15 0.133 0.352 0
Colombia nt 11 0.877 0.225 6 0.667 0.516 0
Colombia lc 7 0.952 0.126 9 0.667 0.500 0
Colombia NA 0 NaN NA 1 0.000 NA NA
France cr 2 0.583 0.589 5 0.040 0.089 1
France en 1 1.000 NA 3 0.333 0.577 1
France vu 4 0.725 0.320 9 0.481 0.467 0
France nt 7 0.839 0.277 6 0.333 0.516 0
France lc 17 0.953 0.133 28 0.476 0.482 4
France dd 0 NaN NA 2 1.000 0.000 1
France not_assessed 3 0.633 0.551 2 0.000 0.000 0
Japan cr 1 1.000 NA 1 0.000 NA 0
Japan not_assessed 49 0.923 0.153 49 0.079 0.181 0
Mexico cr 4 1.000 0.000 3 0.333 0.577 1
Mexico en 9 0.919 0.163 12 0.083 0.289 3
Mexico vu 5 0.900 0.224 5 0.000 0.000 1
Mexico nt 1 0.889 NA 2 0.000 0.000 0
Mexico lc 5 0.936 0.092 12 0.497 0.367 2
Mexico dd 1 1.000 NA 1 0.333 NA 0
Mexico not_assessed 3 0.958 0.072 12 0.158 0.318 0
S. Africa cr 14 0.860 0.285 12 0.042 0.144 2
S. Africa en 16 0.895 0.182 9 0.467 0.469 1
S. Africa vu 14 0.982 0.067 12 0.500 0.522 1
S. Africa nt 8 0.969 0.088 8 0.253 0.356 0
S. Africa lc 34 1.000 0.000 18 0.667 0.485 1
S. Africa dd 1 1.000 NA 0 NaN NA 0
S. Africa not_assessed 2 0.750 0.354 1 0.000 NA 0
S. Africa unknown 1 1.000 NA 1 1.000 NA 0
Sweden en 5 0.489 0.208 2 0.050 0.071 0
Sweden vu 7 0.685 0.247 7 0.297 0.363 1
Sweden nt 8 0.816 0.273 5 0.054 0.074 1
Sweden lc 63 0.836 0.259 39 0.258 0.380 17
Sweden dd 4 0.549 0.299 4 0.250 0.500 1
Sweden not_assessed 33 0.744 0.268 24 0.085 0.228 0
US cr 6 0.828 0.164 4 0.583 0.419 1
US en 18 0.743 0.268 10 0.300 0.483 2
US vu 14 0.664 0.271 6 0.464 0.323 0
US nt 9 0.796 0.289 5 0.284 0.435 3
US lc 6 0.791 0.208 1 0.000 NA 0
US dd 2 0.917 0.118 0 NaN NA 0
US not_assessed 61 0.829 0.234 48 0.379 0.418 0
US unknown 1 1.000 NA 1 0.000 NA 0

Distribution type

Summary stats:

x<-indicators_averaged_one %>% 
                group_by(species_range) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))

# nice table
kable(x, digits=3)
species_range n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
restricted 332 0.810 0.262 310 0.188 0.345 24
unknown 18 0.832 0.250 19 0.316 0.478 1
wide ranging 194 0.867 0.217 228 0.388 0.434 40
NA 0 NaN NA 1 0.000 NA NA

Detailed table by IUCN category:

x<-indicators_averaged_one %>% 
                group_by(country_assessment, species_range) %>%
                summarise(n.PM.ind=sum(!is.na(indicator2)), 
                          mean.PM.ind=mean(indicator2, na.rm=TRUE),
                          sd.PM.ind=sd(indicator2, na.rm=TRUE),
                          n.Ne.ind=sum(!is.na(indicator1)), 
                          mean.Ne.ind=mean(indicator1, na.rm=TRUE),
                          sd.Ne.ind=sd(indicator1, na.rm=TRUE),
                          Mon.ind=sum(temp_gen_monitoring=="yes"))
## `summarise()` has grouped output by 'country_assessment'. You can override
## using the `.groups` argument.
# nice table
kable(x, digits=3)
country_assessment species_range n.PM.ind mean.PM.ind sd.PM.ind n.Ne.ind mean.Ne.ind sd.Ne.ind Mon.ind
Australia restricted 14 0.865 0.224 27 0.114 0.253 4
Australia unknown 0 NaN NA 1 0.000 NA 0
Australia wide ranging 14 0.942 0.110 19 0.260 0.347 6
Belgium restricted 10 0.319 0.128 22 0.135 0.262 1
Belgium unknown 2 0.456 0.062 5 0.000 0.000 1
Belgium wide ranging 15 0.542 0.242 74 0.295 0.411 8
Colombia restricted 39 0.842 0.227 28 0.286 0.460 0
Colombia unknown 9 0.785 0.264 9 0.556 0.527 0
Colombia wide ranging 2 0.833 0.236 3 0.333 0.577 0
Colombia NA 0 NaN NA 1 0.000 NA NA
France restricted 14 0.741 0.336 28 0.227 0.388 2
France wide ranging 20 0.933 0.202 27 0.611 0.476 5
Japan restricted 35 0.939 0.141 35 0.080 0.180 0
Japan unknown 1 1.000 NA 1 0.000 NA 0
Japan wide ranging 14 0.884 0.179 14 0.076 0.192 0
Mexico restricted 19 0.933 0.138 31 0.094 0.267 4
Mexico unknown 2 1.000 0.000 0 NaN NA 0
Mexico wide ranging 7 0.926 0.150 16 0.456 0.385 3
S. Africa restricted 41 0.905 0.206 29 0.217 0.391 4
S. Africa unknown 2 1.000 0.000 1 1.000 NA 0
S. Africa wide ranging 47 0.984 0.081 31 0.595 0.475 1
Sweden restricted 71 0.708 0.292 52 0.077 0.212 6
Sweden unknown 2 1.000 0.000 2 0.000 0.000 0
Sweden wide ranging 47 0.871 0.204 27 0.426 0.411 14
US restricted 89 0.813 0.243 58 0.378 0.420 3
US unknown 0 NaN NA 0 NaN NA 0
US wide ranging 28 0.735 0.244 17 0.339 0.407 3

Session Info for reproducibility purposes:

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.39         lme4_1.1-31        Matrix_1.5-3       cowplot_1.1.1     
##  [5] viridis_0.6.3      viridisLite_0.4.0  alluvial_0.1-2     ggsankey_0.0.99999
##  [9] ggplot2_3.4.1      stringr_1.4.0      utile.tools_0.2.7  readr_2.1.2       
## [13] dplyr_1.0.9        tidyr_1.2.0       
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.2 xfun_0.31        bslib_0.3.1      purrr_0.3.4     
##  [5] splines_4.2.1    lattice_0.20-45  colorspace_2.0-3 vctrs_0.5.2     
##  [9] generics_0.1.3   htmltools_0.5.5  yaml_2.3.5       utf8_1.2.2      
## [13] rlang_1.0.6      nloptr_2.0.3     jquerylib_0.1.4  pillar_1.7.0    
## [17] glue_1.6.2       withr_2.5.0      DBI_1.1.3        lifecycle_1.0.3 
## [21] munsell_0.5.0    gtable_0.3.0     evaluate_0.15    labeling_0.4.2  
## [25] tzdb_0.3.0       fastmap_1.1.0    fansi_1.0.3      highr_0.9       
## [29] Rcpp_1.0.10      scales_1.2.0     jsonlite_1.8.0   farver_2.1.1    
## [33] gridExtra_2.3    hms_1.1.1        digest_0.6.29    stringi_1.7.6   
## [37] grid_4.2.1       cli_3.6.0        tools_4.2.1      magrittr_2.0.3  
## [41] sass_0.4.1       tibble_3.1.7     crayon_1.5.1     pkgconfig_2.0.3 
## [45] ellipsis_0.3.2   MASS_7.3-57      minqa_1.2.5      assertthat_0.2.1
## [49] rmarkdown_2.14   rstudioapi_0.13  boot_1.3-28      R6_2.5.1        
## [53] nlme_3.1-157     compiler_4.2.1